Skip to content

Commit

Permalink
Pcc42 (pybites#429)
Browse files Browse the repository at this point in the history
* PCC42 rickhehe

* PCC42 passed my own test
  • Loading branch information
rickhehe authored and pybites committed Jan 7, 2019
1 parent 1d2aeb5 commit c0cb343
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
49 changes: 49 additions & 0 deletions 42/rickhehe/regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import re


def extract_course_times():
'''Use re.findall to capture all mm:ss timestamps in a list'''
flask_course = ('Introduction 1 Lecture 01:47'
'The Basics 4 Lectures 32:03'
'Getting Technical! 4 Lectures 41:51'
'Challenge 2 Lectures 27:48'
'Afterword 1 Lecture 05:02')
return re.findall(r'\d{2}:\d{2}', flask_course)

#print(extract_course_times())

def split_on_multiple_chars():
'''Use re.split to split log line by ; , .
but not on the last ... so list should have len of 4
(hint check re.split docs for extra switches)'''
logline = ('2017-11-03T01:00:02;challenge time,regex!.'
'hope you join ... soon')
return re.split(r'[;,.]', logline, maxsplit = 3)
#print(len(split_on_multiple_chars()))

def get_all_hashtags_and_links():
'''Use re.findall to extract the URL and 2 hashtags of this tweet'''
tweet = ('New PyBites article: Module of the Week - Requests-cache '
'for Repeated API Calls - http://pybit.es/requests-cache.html '
'#python #APIs')
return re.findall(r'http\S+|#\S+', tweet) #alternatively use (?:http|#)\S+, complete string will be returned as no captured groups.
#print(get_all_hashtags_and_links())

def match_first_paragraph():
'''Use re.sub to extract the content of the first paragraph (excl tags)'''
html = ('<p>pybites != greedy</p>'
'<p>not the same can be said REgarding ...</p>')
return re.sub(r'.*?<p>(.*?)<.+', r'\1', html)
#print(match_first_paragraph())


def find_double_words():
'''Use re.search(regex, text).group() to find the double word'''
text = 'Spain is so nice in the the spring'
return re.search(r'\b(\w+)\s+\1\b', text).group()
#print(find_double_words())

def match_ip_v4_address(ip):
'''Use re.match to match an ip v4 address (no need for exact IP ranges)'''
return re.match(r'\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}', ip)
#print(match_ip_v4_address('192.168.0.1'))
40 changes: 40 additions & 0 deletions 42/rickhehe/test_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from regex import (extract_course_times, split_on_multiple_chars,
get_all_hashtags_and_links, match_first_paragraph,
find_double_words, match_ip_v4_address)


def test_extract_course_times():
expected = ['01:47', '32:03', '41:51', '27:48', '05:02']
assert extract_course_times() == expected


def test_split_on_multiple_chars():
expected = ['2017-11-03T01:00:02', 'challenge time',
'regex!', 'hope you join ... soon']
assert split_on_multiple_chars() == expected


def test_get_all_hashtags_and_links():
expected = ['http://pybit.es/requests-cache.html', '#python', '#APIs']
assert get_all_hashtags_and_links() == expected


def test_match_first_paragraph():
expected = 'pybites != greedy'
assert match_first_paragraph() == expected


def test_find_double_words():
expected = 'the the'
assert find_double_words() == expected


def test_match_ip_address():
valid_ips = ['1.1.1.1', '255.255.255.255', '192.168.1.1',
'10.10.1.1', '132.254.111.10', '26.10.2.10',
'127.0.0.1']
bad_ips = ['10.10.10', '10.10', '10', 'a.a.a.a', '10.0.0.a']
for valid_ip in valid_ips:
assert match_ip_v4_address(valid_ip)
for bad_ip in bad_ips:
assert match_ip_v4_address(bad_ip) is None

0 comments on commit c0cb343

Please sign in to comment.