v1.0.0, line numbers are now included with unclosed tags.

Signed-off-by: Ryan P. C. McQuen <[email protected]>
ryanpcmcquen · Sep 20, 2018 · 6e5a51c · 6e5a51c
1 parent 3416abd
commit 6e5a51c
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -1,12 +1,14 @@
 # unclosedTagFinder
+
 [![GitHub license](https://img.shields.io/badge/license-GPLv2-blue.svg)](https://raw.githubusercontent.com/ryanpcmcquen/unclosedTagFinder/master/LICENSE)
-[![GitHub version](https://img.shields.io/badge/version-0.4.0-orange.svg)](https://github.com/ryanpcmcquen/unclosedTagFinder/releases)
+[![GitHub version](https://img.shields.io/badge/version-1.0.0-orange.svg)](https://github.com/ryanpcmcquen/unclosedTagFinder/releases)
 [![GitHub issues](https://img.shields.io/github/issues/ryanpcmcquen/unclosedTagFinder.svg)](https://github.com/ryanpcmcquen/unclosedTagFinder/issues)
 [![Twitter](https://img.shields.io/twitter/url/https/github.com/ryanpcmcquen/unclosedTagFinder.svg?style=social)](https://twitter.com/intent/tweet?text=Hey%2C%20check%20this%20out%3A%20https%3A%2F%2Fgithub.com%2Fryanpcmcquen%2FunclosedTagFinder&url=%5Bobject%20Object%5D)
 
 Find unclosed tags in HTML source.
 
 ### Usage:
+
 ```
 $ ./unclosedTagFinder.py -i '<html></html>'
 
@@ -25,27 +27,6 @@ Your HTML is perfectly matched. You're awesome!
 
 ---
 
-### TODO:
-
-**Important**:
-
-- [x] Read file input.
-- [x] Accomodate tag names with attributes.
-- [x] Ignore self closing tags.
-- [x] Read remote files.
-- [x] Accomodate user input.
-- [x] Improve command line interface and output.
-- [ ] Give line numbers of unclosed tags.
-- [ ] Improve error output.
-- [ ] Include unit tests.
-
-**Maybe**:
-- [ ] Add a GUI.
-- [ ] Accomodate multiple files.
-- [ ] Fix code?
-
----
-
 Thanks to the Wingware Pro IDE for making Python development even more fun!
 
 ![Wingware!](https://wingware.com/images/wingware-button-200x89.png)
diff --git a/foo.html b/foo.html
@@ -2,6 +2,9 @@
 <html>
 <div>
     <br>
+    Ignore this line.
+    <!-- Ignore this too! -->
     <span class="bar">Foo
 </div>
-</html>
+
+</html>
diff --git a/unclosedTagFinder.py b/unclosedTagFinder.py
@@ -3,13 +3,14 @@
 import argparse
 import urllib.parse
 import urllib.request
+from collections import OrderedDict
 
-htmlRegex = r'<[^\!][^>]*>'
+htmlRegex = r'\s*<[^\!][^>]*>'
 # Void elements:
 # https://www.w3.org/TR/html/syntax.html#void-elements
-voidElementsRegex = r'</?(?!area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)'
-openingTagRegex = r'<[^/]'
-closingTagRegex = r'</'
+voidElementsRegex = r'\s*</?(?!area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)'
+openingTagRegex = r'\s*<[^/]'
+closingTagRegex = r'\s*</'
 
 parser = argparse.ArgumentParser(
     description='Check HTML source for unclosed tags.'
@@ -37,43 +38,50 @@
     # conversion to a string for
     # remote files, but it doesn't
     # hurt to do it for both.
-    html = str(htmlFile.read())
+    htmlLines = enumerate(str(htmlFile.read()).splitlines(), 1)
     htmlFile.close()
 else:
-    html = args.input
-tags = re.compile(htmlRegex, flags=re.I | re.M)
-tagList = re.findall(tags, html)
+    htmlLines = enumerate(args.input.splitlines(), 1)
 
-devoidedTagList = list(
-    filter(
-        lambda tag: re.match(
-            voidElementsRegex,
-            tag
-        ),
-        tagList
+tags = re.compile(htmlRegex, flags=re.IGNORECASE | re.MULTILINE)
+
+tagDict = {
+    line_number: line for (
+        line_number, line
+    ) in htmlLines
+    if tags.search(line)
+}
+
+devoidedTagDict = {
+    line_number: tag for line_number, tag in tagDict.items()
+    if re.match(
+        voidElementsRegex,
+        tag
     )
-)
+}
 
-openingTagList = list(
-    filter(
-        lambda tag: re.match(openingTagRegex, tag),
-        devoidedTagList
+openingTagDict = {
+    line_number: tag for line_number, tag in devoidedTagDict.items()
+    if re.match(
+        openingTagRegex,
+        tag
     )
-)
+}
 
-closingTagList = list(
-    filter(
-        lambda tag: re.match(closingTagRegex, tag),
-        devoidedTagList
+closingTagDict = {
+    line_number: tag for line_number, tag in devoidedTagDict.items()
+    if re.match(
+        closingTagRegex,
+        tag
     )
-)
+}
 
-numberOfOpeningTags = len(openingTagList)
-numberOfClosingTags = len(closingTagList)
+numberOfOpeningTags = len(openingTagDict.items())
+numberOfClosingTags = len(closingTagDict.items())
 
-filteredClosingTagList = list(
-    map(lambda tag: re.sub('/', '', tag), closingTagList)
-)
+filteredClosingTagDict = {
+    line_number: re.sub('/', '', tag) for line_number, tag in closingTagDict.items()
+}
 
 if numberOfOpeningTags == numberOfClosingTags:
     print()
@@ -84,5 +92,8 @@
     print()
     print('The following tags are \033[1;41munclosed\033[1;m\033[0m:')
     print()
-    print(set(openingTagList).difference(filteredClosingTagList))
+    print({
+        line_number: tag for line_number, tag in openingTagDict.items()
+        if tag not in filteredClosingTagDict.values()
+    })
     print()