From c0cb343db425352a1e01c1d0bc097c41d6630d23 Mon Sep 17 00:00:00 2001
From: Rick <37203861+rickhehe@users.noreply.github.com>
Date: Mon, 7 Jan 2019 23:15:23 +1300
Subject: [PATCH] Pcc42 (#429)

* PCC42 rickhehe

* PCC42 passed my own test
---
 42/rickhehe/regex.py      | 49 +++++++++++++++++++++++++++++++++++++++
 42/rickhehe/test_regex.py | 40 ++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 42/rickhehe/regex.py
 create mode 100644 42/rickhehe/test_regex.py
diff --git a/42/rickhehe/regex.py b/42/rickhehe/regex.py
new file mode 100644
index 000000000..8917b4baa
--- /dev/null
+++ b/42/rickhehe/regex.py
@@ -0,0 +1,49 @@
+import re
+
+
+def extract_course_times():
+    '''Use re.findall to capture all mm:ss timestamps in a list'''
+    flask_course = ('Introduction 1 Lecture 01:47'
+                    'The Basics 4 Lectures 32:03'
+                    'Getting Technical!  4 Lectures 41:51'
+                    'Challenge 2 Lectures 27:48'
+                    'Afterword 1 Lecture 05:02')
+    return re.findall(r'\d{2}:\d{2}', flask_course)
+
+#print(extract_course_times())
+
+def split_on_multiple_chars():
+    '''Use re.split to split log line by ; , .
+       but not on the last ... so list should have len of 4
+       (hint check re.split docs for extra switches)'''
+    logline = ('2017-11-03T01:00:02;challenge time,regex!.'
+               'hope you join ... soon')
+    return re.split(r'[;,.]', logline, maxsplit = 3)
+#print(len(split_on_multiple_chars()))
+
+def get_all_hashtags_and_links():
+    '''Use re.findall to extract the URL and 2 hashtags of this tweet'''
+    tweet = ('New PyBites article: Module of the Week - Requests-cache '
+             'for Repeated API Calls - http://pybit.es/requests-cache.html '
+             '#python #APIs')
+    return re.findall(r'http\S+|#\S+', tweet) #alternatively use (?:http|#)\S+, complete string will be returned as no captured groups.
+#print(get_all_hashtags_and_links())
+
+def match_first_paragraph():
+    '''Use re.sub to extract the content of the first paragraph (excl tags)'''
+    html = ('<p>pybites != greedy</p>'
+            '<p>not the same can be said REgarding ...</p>')
+    return re.sub(r'.*?<p>(.*?)<.+', r'\1', html)
+#print(match_first_paragraph())
+
+
+def find_double_words():
+    '''Use re.search(regex, text).group() to find the double word'''
+    text = 'Spain is so nice in the the spring'
+    return re.search(r'\b(\w+)\s+\1\b', text).group()
+#print(find_double_words())
+
+def match_ip_v4_address(ip):
+    '''Use re.match to match an ip v4 address (no need for exact IP ranges)'''
+    return re.match(r'\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}', ip)
+#print(match_ip_v4_address('192.168.0.1'))
diff --git a/42/rickhehe/test_regex.py b/42/rickhehe/test_regex.py
new file mode 100644
index 000000000..35403974f
--- /dev/null
+++ b/42/rickhehe/test_regex.py
@@ -0,0 +1,40 @@
+from regex import (extract_course_times, split_on_multiple_chars,
+                   get_all_hashtags_and_links, match_first_paragraph,
+                   find_double_words, match_ip_v4_address)
+
+
+def test_extract_course_times():
+    expected = ['01:47', '32:03', '41:51', '27:48', '05:02']
+    assert extract_course_times() == expected
+
+
+def test_split_on_multiple_chars():
+    expected = ['2017-11-03T01:00:02', 'challenge time',
+                'regex!', 'hope you join ... soon']
+    assert split_on_multiple_chars() == expected
+
+
+def test_get_all_hashtags_and_links():
+    expected = ['http://pybit.es/requests-cache.html', '#python', '#APIs']
+    assert get_all_hashtags_and_links() == expected
+
+
+def test_match_first_paragraph():
+    expected = 'pybites != greedy'
+    assert match_first_paragraph() == expected
+
+
+def test_find_double_words():
+    expected = 'the the'
+    assert find_double_words() == expected
+
+
+def test_match_ip_address():
+    valid_ips = ['1.1.1.1', '255.255.255.255', '192.168.1.1',
+                 '10.10.1.1', '132.254.111.10', '26.10.2.10',
+                 '127.0.0.1']
+    bad_ips = ['10.10.10', '10.10', '10', 'a.a.a.a', '10.0.0.a']
+    for valid_ip in valid_ips:
+        assert match_ip_v4_address(valid_ip)
+    for bad_ip in bad_ips:
+        assert match_ip_v4_address(bad_ip) is None