Merge pull request #6 from Nishantbhagat57/main

Added chardet to detect the encoding of the content
xnl-h4ck3r · Dec 4, 2023 · a5e94ab · a5e94ab
2 parents 146b894 + 583bb2e
commit a5e94ab
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ## Changelog
 
+- v1.1
+
+- Changed
+
+  - Add support to automatically identify file encoding.
+
 - v1.0
 
 - Changed

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <center><img src="https://github.com/xnl-h4ck3r/urless/blob/main/urless/images/title.png"></center>
 
-## About - v1.0
+## About - v1.1
 
 This is a tool used to de-clutter a list of URLs.
 As a starting point, I took the amazing tool [uro](https://github.com/s0md3v/uro/) by Somdev Sangwan. But I wanted to change a few things, make some improvements (like deal with GUIDs) and make it more customizable.

diff --git a/setup.py b/setup.py
@@ -27,7 +27,7 @@
     author="@xnl-h4ck3r",
     url="https://github.com/xnl-h4ck3r/urless",
     zip_safe=False,
-    install_requires=["argparse", "pyyaml", "termcolor", "urlparse3"],
+    install_requires=["argparse", "pyyaml", "termcolor", "urlparse3", "chardet"],
     entry_points={
         'console_scripts': [
             'urless = urless.urless:main',

diff --git a/urless/__init__.py b/urless/__init__.py
@@ -1 +1 @@
-__version__="1.0"
+__version__="1.1"
diff --git a/urless/urless.py b/urless/urless.py
@@ -11,6 +11,7 @@
 from typing import Pattern
 import yaml
 import argparse
+import chardet
 from signal import SIGINT, signal
 from urllib.parse import urlparse
 from termcolor import colored
@@ -438,30 +439,32 @@ def processLine(line):
     if args.ignore_querystring:
         line = line.split('?')[0].split('#')[0]
     return line
-                            
+
 def processInput():
-
     global linesOrigCount
     try:
         if not sys.stdin.isatty():
             for line in sys.stdin:
                 processUrl(processLine(line))
         else:
+            with open(os.path.expanduser(args.input), 'rb') as f:
+                result = chardet.detect(f.read())  # or readline if the file is large
+
             try:
-                inFile = open(os.path.expanduser(args.input), 'r')
+                inFile = open(os.path.expanduser(args.input), 'r', encoding=result['encoding'])
                 lines = inFile.readlines()
                 linesOrigCount = len(lines)
                 for line in lines:
                     processUrl(processLine(line))
             except Exception as e:
                 writerr(colored('ERROR processInput 2 ' + str(e), 'red'))    
-            
+
             try:
                 inFile.close()
             except:
-                pass            
+                pass        
     except Exception as e:
-        writerr(colored('ERROR processInput 1: ' + str(e), 'red'))   
+        writerr(colored('ERROR processInput 1: ' + str(e), 'red'))  
 
 def processOutput():
     global linesFinalCount, linesOrigCount, patternsGUID, patternsInt, patternsCustomID, patternsLang
@@ -717,4 +720,4 @@ def main():
 
 if __name__ == '__main__':
     main()
-
+