Python
/
Strings
- 1 Language 9
-
Hello World
-
Variables
-
Functions
-
Conditional
-
Operators
-
While
-
Turtle
-
Script Mode
-
Debugging
- 2 Strings 6
-
Slice
-
Raw Strings
-
Regex
-
Validation
-
Config
-
Escape
- 3 Collections 5
-
Lists
-
Dictionaries
-
Efficiency
-
Tuples
-
References
- 4 Functions 5
-
Recursion
-
Factorial
-
Modulus
-
Reassignment
-
Approximate
- 5 Storage 8
-
Files
-
Databases
-
Pipes
-
With open
-
Shelve
-
Zip
-
Csv
-
Json
- 6 Class 4
-
Definition
-
Attributes
-
Functional
-
Methods
- 7 Goodies 5
-
Conditional Expression
-
List Comprehension
-
Generator
-
Named Tuple
-
Modules
- 8 Applications 5
-
Pythagora
-
Palindrome
-
Binary Search
-
Conway Game
-
Coin Flip
- 9 Scheduler 4
-
Time
-
Multithreading
-
Subprocess
-
Logging
- 10 Packages 2
-
Clipboard
-
Ocr
/
Regex
➟
➟
Last update: 07-12-2021
Find Text
p162 Finding patterns in a normal way, without regular expressions.
"""Find text in phone format: 415-555-1234
Not using regex involves a lot of code.
If you want to find a phone within a larger text ...
you would have to add even more code.
"""
def is_phone_number(text):
if len(text) != 12:
return False
for i in range(0, 3):
if not text[i].isdecimal():
return False
if text[3] != '-':
return False
for i in range(4, 7):
if not text[i].isdecimal():
return False
if text[7] != '-':
return False
for i in range(8, 12):
if not text[i].isdecimal():
return False
return True
assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False
text = """Call me at 123-456-7777 or 415-555-1234,
but not at 415-5551234."""
for i in range(len(text)):
chunk = text[i:i+12]
if is_phone_number(chunk):
print("Found phone number: " + chunk)
print("Done")
# Found phone number: 123-456-7777
# Found phone number: 415-555-1234
# Done
Patterns
p164 Regular expressions are descriptions for a pattern of text.
"""Find text in phone format: 415-555-1234
Use regular expression patterns.
The group() method returns the match.
The findAll() method returns a list of strings.
"""
import re
# Check if string is phone number
def is_phone_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
if result == None:
return False
return True
assert is_phone_number('123456789012') == False
assert is_phone_number('123-456-7777') == True
assert is_phone_number('123-4567777') == False
# Search phone numbers in a text
text = 'Call me at 123-456-7777 or 415-555-1234, but not at 415-5551234.'
def search_first_number(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.search(text)
return result.group()
def search_all_numbers(text):
pattern = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
result = pattern.findall(text)
return result
print("First number: " + search_first_number(text))
# First number: 123-456-7777
numbers = search_all_numbers(text) # list of strings
print("Numbers: \n" + '\n'.join(numbers))
# Numbers:
# 123-456-7777
# 415-555-1234
Groups
p166 The groups() method returns a tuple of multiple values.
"""Regex: Grouping with parenthesses
Adding parenthesses will create groups in the regex.
"""
import re
text = 'My number is 415-555-1234'
pattern = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
result = pattern.search(text)
code, number = result.groups()
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'
Verbose
p178 You can use triple quote syntax to spread regex on multiple lines.
"""Regex on multiple lines
This verbose mode can be enabled with re.VERBOSE
"""
import re
pattern = re.compile(r'''
(\d{3}) # area code
(\s|-)? # separator
(
\d{3} # 3 digits
(\s|-) # separator
\d{4} # 4 digits
)
''', re.VERBOSE)
result = pattern.search('My number is 415 555-1234')
groups = result.groups()
code, sep, number, sep = groups
assert groups == ('415', ' ', '555-1234', '-')
assert code == '415'
assert number == '555-1234'
assert number != '415-555-1234'
Find authors (A)
A program to find some pattern in a full page text.
"""Find all authors @nickname in an amazon page.
Page example: https://www.amazon.com/gp/product/1593279922
Run program: Ctrl-A, Ctrl-C, run program, Ctrl-V
"""
import re, pyperclip
clipboard = pyperclip.paste()
pattern = re.compile('@[a-zA-Z0-9_-]+')
authors = pattern.findall(clipboard)
pyperclip.copy('\n'.join(authors))
print(pyperclip.paste())
# @OscarBaruffa
# @Awful_Curious
# @mcapablanca
Strong password (A)
Use regex to find if a password is strong.
"""Strong password detection:
At least 8 character long, contains both uppercase and lowercase,
and has at least one digit,
and has at least one non-word character
"""
import re
def password_is_strong(password):
pattern = r'''(
(?=.*[a-z]+) # positive look ahead, at least one lowercase
(?=.*[A-Z]+) # positive look ahead, at least one upper case
(?=.*[\d]+) # positive look ahead, at least one digit
(?=.*[\W]+) # positive look ahead, at least one non-word
.{8,} # plus 5 more characters
)'''
pattern = re.compile(pattern, re.VERBOSE)
result = pattern.search(password)
return result != None
assert password_is_strong("abc") == False
assert password_is_strong("abcdefgh") == False
assert password_is_strong("Abcd2efgh!") == True
assert password_is_strong("aB2&bcde") == True
assert password_is_strong("aBcefg1!") == True
assert password_is_strong("aBcef1!") == False
➥ Questions github Strings