Find Text
Finding patterns in a normal way, without regular expressions.
"""Find text in phone format: 415-555-1234
Not using regex involves a lot of code.
If you want to find a phone within a larger text ...
you would have to add even more code.
"""
def is_phone_number(text):
if len(text) != 12:
return False
for i in range(0, 3):
if not text[i].isdecimal():
return False
if text[3] != '-':
return False
for i in range(4, 7):
if not text[i].isdecimal():
return False
if text[7] != '-':
return False
for i in range(8, 12):
if not text[i].isdecimal():
return False
return True
assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False
text = """Call me at 123-456-7777 or 415-555-1234,
but not at 415-5551234."""
for i in range(len(text)):
chunk = text[i:i+12]
if is_phone_number(chunk):
print("Found phone number: " + chunk)
print("Done")
# Found phone number: 123-456-7777
# Found phone number: 415-555-1234
# Done
Patterns
Regular expressions are descriptions for a pattern of text.
"""Find text in phone format: 415-555-1234
Use regular expression patterns.
The group() method returns the match.
The findAll() method returns a list of strings.
"""
from nis import match
import re
# Check if string is phone number
def is_phone_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
if result == None:
return False
return True
assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False
# Search phone numbers in a text
text = 'Call me at 123-456-7777 or 415-555-1234, but not at 415-5551234.'
def search_first_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
return result.group()
def search_all_numbers(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.findall(text)
return result
print("First number: " + search_first_number(text))
# First number: 123-456-7777
numbers = search_all_numbers(text) # list of strings
print("Numbers: \n" + '\n'.join(numbers))
# Numbers:
# 123-456-7777
# 415-555-1234
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search('00111-222-444455')
print(result.group())
# 111-222-4444
Groups
The groups() method returns a tuple of multiple values.
"""Regex: Grouping with parenthesses
Adding parenthesses will create groups in the regex.
"""
import re
text = 'My number is 415-555-1234'
pattern = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
result = pattern.search(text)
code, number = result.groups()
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'
Verbose
You can use triple quote syntax to spread regex on multiple lines.
"""Regex on multiple lines
This verbose mode can be enabled with re.VERBOSE
"""
import re
pattern = re.compile(r'''
(\d{3}) # area code
(\s|-)? # separator
(
\d{3} # 3 digits
(\s|-) # separator
\d{4} # 4 digits
)
''', re.VERBOSE)
result = pattern.search('My number is 415 555-1234')
groups = result.groups()
code, sep, number, sep = groups
assert groups == ('415', ' ', '555-1234', '-')
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'
Applications (2)
A program to find some pattern in a full page text.
"""Find authors App:
Find all authors @nickname in an amazon page
1) Open https://www.amazon.com/gp/product/1593279922
2) Copy Ctrl-A, Ctrl-C
3) Run python program
4) Paste Ctrl-v
"""
import re, pyperclip
clipboard = pyperclip.paste()
pattern = re.compile('@[a-zA-Z0-9_-]+')
authors = pattern.findall(clipboard)
pyperclip.copy('\n'.join(authors))
print(pyperclip.paste())
# @OscarBaruffa
# @Awful_Curious
# @mcapablanca
Use regex to find if a password is strong.
"""Strong password detection:
At least 8 character long, contains both uppercase and lowercase,
and has at least one digit,
and has at least one non-word character
"""
import re
def password_is_strong(password):
pattern = r'''(
(?=.*[a-z]+) # positive look ahead, at least one lowercase
(?=.*[A-Z]+) # positive look ahead, at least one upper case
(?=.*[\d]+) # positive look ahead, at least one digit
(?=.*[\W]+) # positive look ahead, at least one non-word
.{8,} # plus 5 more characters
)'''
pattern = re.compile(pattern, re.VERBOSE)
result = pattern.search(password)
return result != None
assert password_is_strong("abc") == False
assert password_is_strong("abcdefgh") == False
assert password_is_strong("Abcd2efgh!") == True
assert password_is_strong("aB2&bcde") == True
assert password_is_strong("aBcefg1!") == True
assert password_is_strong("aBcef1!") == False
Last update: 442 days ago