minte9
LearnRemember



Find Text

Finding patterns in a normal way, without regular expressions.
 
"""Find text in phone format: 415-555-1234
Not using regex involves a lot of code.
If you want to find a phone within a larger text ...
you would have to add even more code.
"""

def is_phone_number(text):

    if len(text) != 12:
        return False

    for i in range(0, 3):
        if not text[i].isdecimal():
            return False

    if text[3] != '-':
        return False

    for i in range(4, 7):
        if not text[i].isdecimal():
            return False  
    
    if text[7] != '-':
        return False
    
    for i in range(8, 12):
        if not text[i].isdecimal():
            return False  

    return True

assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False

text = """Call me at 123-456-7777 or 415-555-1234, 
but not at 415-5551234."""

for i in range(len(text)):
    chunk = text[i:i+12]
    if is_phone_number(chunk):
        print("Found phone number: " + chunk)
print("Done")

# Found phone number: 123-456-7777
# Found phone number: 415-555-1234
# Done

Patterns

Regular expressions are descriptions for a pattern of text.
 
"""Find text in phone format: 415-555-1234
Use regular expression patterns.
The group() method returns the match.
The findAll() method returns a list of strings.
"""
from nis import match
import re


# Check if string is phone number

def is_phone_number(text):
    pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
    result = pattern.search(text)
    if result == None:
        return False
    return True

assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False


# Search phone numbers in a text

text = 'Call me at 123-456-7777 or 415-555-1234, but not at 415-5551234.'

def search_first_number(text):
    pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
    result = pattern.search(text)
    return result.group()

def search_all_numbers(text):
    pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
    result = pattern.findall(text)
    return result

print("First number: " + search_first_number(text))
    # First number: 123-456-7777

numbers = search_all_numbers(text) # list of strings
print("Numbers: \n" + '\n'.join(numbers))
    # Numbers: 
    # 123-456-7777
    # 415-555-1234

pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search('00111-222-444455')
print(result.group())
    # 111-222-4444

Groups

The groups() method returns a tuple of multiple values.
 
"""Regex: Grouping with parenthesses
Adding parenthesses will create groups in the regex.
"""
import re

text = 'My number is 415-555-1234'
pattern = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
result = pattern.search(text)
code, number = result.groups()

assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'

Verbose

You can use triple quote syntax to spread regex on multiple lines.
 
"""Regex on multiple lines
This verbose mode can be enabled with re.VERBOSE
"""
import re

pattern = re.compile(r'''
    (\d{3})  # area code
    (\s|-)?    # separator
    (
        \d{3} # 3 digits
        (\s|-)  # separator
        \d{4} # 4 digits
    )
''', re.VERBOSE)

result = pattern.search('My number is 415 555-1234')
groups = result.groups()
code, sep, number, sep = groups

assert groups == ('415', ' ', '555-1234', '-')
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'

Applications (2)

A program to find some pattern in a full page text.
 
"""Find authors App:
Find all authors @nickname in an amazon page

1) Open https://www.amazon.com/gp/product/1593279922
2) Copy Ctrl-A, Ctrl-C
3) Run python program
4) Paste Ctrl-v
"""

import re, pyperclip

clipboard = pyperclip.paste()

pattern = re.compile('@[a-zA-Z0-9_-]+')
authors = pattern.findall(clipboard)

pyperclip.copy('\n'.join(authors))
print(pyperclip.paste())
    # @OscarBaruffa
    # @Awful_Curious
    # @mcapablanca
Use regex to find if a password is strong.
 
"""Strong password detection:
At least 8 character long, contains both uppercase and lowercase,
and has at least one digit,
and has at least one non-word character
"""
import re

def password_is_strong(password):
    pattern = r'''(
        (?=.*[a-z]+)    # positive look ahead, at least one lowercase
        (?=.*[A-Z]+)    # positive look ahead, at least one upper case
        (?=.*[\d]+)     # positive look ahead, at least one digit
        (?=.*[\W]+)     # positive look ahead, at least one non-word
        .{8,}           # plus 5 more characters
        )'''
    pattern = re.compile(pattern, re.VERBOSE)
    result = pattern.search(password)
    return result != None

assert password_is_strong("abc") == False
assert password_is_strong("abcdefgh") == False

assert password_is_strong("Abcd2efgh!") == True
assert password_is_strong("aB2&bcde") == True

assert password_is_strong("aBcefg1!") == True
assert password_is_strong("aBcef1!") == False



  Last update: 412 days ago