In [1]:
import re
In [2]:
# Validating an Email Address
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
email = "example@example.com"
match = re.match(pattern, email)
print(bool(match))
True
In [3]:
# Replacing Text
pattern = r"world"
replacement = "Python"
text = "hello world"
new_text = re.sub(pattern, replacement, text)
print(new_text)
hello Python
In [4]:
patterns = {
"email": r''''^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'''',
"url": r''''^(https?|ftp)://[^\s/$.?#].[^\s]*$'''',
"domain": r''''^(?:a-zA-Z0-9?\.)+[a-zA-Z]{2,6}$'''',
"tags": r''''#\w+'''',
# Extracts hashtags
"isbn10": r''''^\d{9}[\dX]$'''',
# ISBN-10
"isbn13": r''''^\d{13}$'''',
# ISBN-13
"code39": r''''^[\dA-Z\-\.\ \$\/\+\%]+$'''',
# Code 39 barcode
}
patterns.update({
"us_phone": r''''^\+?1?\d{10}$'''',
"intl_phone": r''''^\+?[1-9]\d{1,14}$'''',
"ssn": r''''^\d{3}-\d{2}-\d{4}$'''',
"us_zip": r''''^\d{5}(-\d{4})?$'''',
"us_vat": r''''^\d{2}-\d{7}$'''',
# US VAT number (EIN format)
"phone_number": r"\(\d{3}\) \d{3}-\d{4}",
# Matches (123) 456-7890
"zip_code": r"\d{5}(-\d{4})?",
# Matches 12345 or 12345-6789
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
# Matches common email formats
})
# UK
patterns.update({
"postcode": r"[A-Z]{1,2}\d[A-Z\d]? \d[A-Z]{2}",
# Matches UK
"uk_postcode": r''''^[A-Z]{1,2}\d[A-Z\d]? \d[A-Z]{2}$'''',
# postcodes like SW1A 1AA
"uk_vat": r''''^GB\d{9}$'''',
# UK VAT number
"phone_number": r"\+44 \d{4} \d{6}",
# Matches +44 1234 567890
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
# Matches common email formats
})
# Europe
patterns.update({
"phone_number": r"\+\d{2} \d{3} \d{3} \d{3}",
# Matches +12 345 678 901
"eu_vat": r''''^[A-Z]{2}\d{8,12}$'''',
# EU VAT number
"postal_code": r"\d{4,5}",
# Matches postal codes like 1234 or 12345
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
# Matches common email formats
})
# India
patterns.update({
"phone_number": r"\+91-\d{10}",
# Matches +91-1234567890
"postal_code": r"\d{6}",
# Matches postal codes like 123456
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
# Matches common email formats
})
# Brazil
patterns.update({
"phone_number": r"\(\d{2}\) \d{4,5}-\d{4}",
# Matches (12) 34567-8901 or (12) 3456-7890
"postal_code": r"\d{5}-\d{3}",
# Matches postal codes like 12345-678
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
# Matches common email formats
})
# Healthcare
patterns.update({
"healthcare_id": r''''^[A-Z]{2}\d{6}$'''',
# Example: AB123456
"npi": r''''^\d{10}$'''',
# National Provider Identifier
"icd9": r''''^\d{3}(\.\d{1,2})?$'''',
# ICD-9 codes
"icd10": r''''^[A-Z]\d{2}(\.\d{1,4})?$'''',
# ICD-10 codes
"icd11": r''''^[A-Z]\d{2}(\.\d{1,4})?$'''',
# ICD-11 codes
"cpt": r''''^\d{5}$'''',
# CPT codes
"ndc": r''''^\d{4}-\d{4}-\d{2}$'''',
# NDC codes
"surgery_procedure": r''''^[A-Z0-9]{3,5}$'''',
# Example: OPCS-4 codes
})
# Finance
patterns.update({
"credit_card": r''''^\d{4}-?\d{4}-?\d{4}-?\d{4}$'''',
# Matches credit card numbers
"iban": r''''^[A-Z]{2}\d{2}[A-Z0-9]{1,30}$'''',
# Matches IBAN
"swift": r''''^[A-Z]{6}[A-Z2-9]A-NP-Z0-9?$'''',
# Matches SWIFT/BIC
"slips_line_code": r''''^\d{6}-\d{6}-\d{6}-\d{6}$'''',
# Example: 123456-123456-123456-123456
"cheques_line_code": r''''^\d{6}-\d{6}-\d{6}-\d{6}$'''',
# Example: 123456-123456-123456-123456
"micr": r''''^\d{9}$'''',
# MICR code
})
# IT
patterns.update({
"ipv4": r''''^(\d{1,3}\.){3}\d{1,3}$'''',
# Matches IPv4 addresses
"ipv6": r''''^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$'''',
# Matches IPv6 addresses
"mac_address": r''''^([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}$'''',
# Matches MAC addresses
"uuid": r''''^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'''',
# Matches UUID
})