# minte9 LearnRemember

S R Q

### Find Text

p162 Finding patterns in a normal way, without regular expressions.  """Find text in phone format: 415-555-1234
Not using regex involves a lot of code.
If you want to find a phone within a larger text ...
you would have to add even more code.
"""

def is_phone_number(text):

if len(text) != 12:
return False

for i in range(0, 3):
if not text[i].isdecimal():
return False

if text != '-':
return False

for i in range(4, 7):
if not text[i].isdecimal():
return False

if text != '-':
return False

for i in range(8, 12):
if not text[i].isdecimal():
return False

return True

assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False

text = """Call me at 123-456-7777 or 415-555-1234,
but not at 415-5551234."""

for i in range(len(text)):
chunk = text[i:i+12]
if is_phone_number(chunk):
print("Found phone number: " + chunk)
print("Done")

# Found phone number: 123-456-7777
# Found phone number: 415-555-1234
# Done


### Patterns

p164 Regular expressions are descriptions for a pattern of text.  """Find text in phone format: 415-555-1234
Use regular expression patterns.
The group() method returns the match.
The findAll() method returns a list of strings.
"""
from nis import match
import re

# Check if string is phone number

def is_phone_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
if result == None:
return False
return True

assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False

# Search phone numbers in a text

text = 'Call me at 123-456-7777 or 415-555-1234, but not at 415-5551234.'

def search_first_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
return result.group()

def search_all_numbers(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.findall(text)
return result

print("First number: " + search_first_number(text))
# First number: 123-456-7777

numbers = search_all_numbers(text) # list of strings
print("Numbers: \n" + '\n'.join(numbers))
# Numbers:
# 123-456-7777
# 415-555-1234

pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search('00111-222-444455')
print(result.group())
# 111-222-4444


### Groups

p166 The groups() method returns a tuple of multiple values.  """Regex: Grouping with parenthesses
Adding parenthesses will create groups in the regex.
"""
import re

text = 'My number is 415-555-1234'
pattern = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
result = pattern.search(text)
code, number = result.groups()

assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'


### Verbose

p178 You can use triple quote syntax to spread regex on multiple lines.  """Regex on multiple lines
This verbose mode can be enabled with re.VERBOSE
"""
import re

pattern = re.compile(r'''
(\d{3})  # area code
(\s|-)?    # separator
(
\d{3} # 3 digits
(\s|-)  # separator
\d{4} # 4 digits
)
''', re.VERBOSE)

result = pattern.search('My number is 415 555-1234')
groups = result.groups()
code, sep, number, sep = groups

assert groups == ('415', ' ', '555-1234', '-')
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'

Applications (2)

### Applications (2)

A program to find some pattern in a full page text.  """Find authors App:
Find all authors @nickname in an amazon page

1) Open https://www.amazon.com/gp/product/1593279922
2) Copy Ctrl-A, Ctrl-C
3) Run python program
4) Paste Ctrl-v
"""

import re, pyperclip

clipboard = pyperclip.paste()

pattern = re.compile('@[a-zA-Z0-9_-]+')
authors = pattern.findall(clipboard)

pyperclip.copy('\n'.join(authors))
print(pyperclip.paste())
# @OscarBaruffa
# @Awful_Curious
# @mcapablanca

Use regex to find if a password is strong.  At least 8 character long, contains both uppercase and lowercase,
and has at least one digit,
and has at least one non-word character
"""
import re

pattern = r'''(
(?=.*[a-z]+)    # positive look ahead, at least one lowercase
(?=.*[A-Z]+)    # positive look ahead, at least one upper case
(?=.*[\d]+)     # positive look ahead, at least one digit
(?=.*[\W]+)     # positive look ahead, at least one non-word
.{8,}           # plus 5 more characters
)'''
pattern = re.compile(pattern, re.VERBOSE)