diff --git a/CHANGELOG.md b/CHANGELOG.md index 8181e83..89f8754 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## Changelog +- v1.1 + +- Changed + + - Add support to automatically identify file encoding. + - v1.0 - Changed diff --git a/README.md b/README.md index 80aa665..5bcc8e4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-## About - v1.0 +## About - v1.1 This is a tool used to de-clutter a list of URLs. As a starting point, I took the amazing tool [uro](https://github.com/s0md3v/uro/) by Somdev Sangwan. But I wanted to change a few things, make some improvements (like deal with GUIDs) and make it more customizable. diff --git a/setup.py b/setup.py index 8f34e03..589ed76 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ author="@xnl-h4ck3r", url="https://github.com/xnl-h4ck3r/urless", zip_safe=False, - install_requires=["argparse", "pyyaml", "termcolor", "urlparse3"], + install_requires=["argparse", "pyyaml", "termcolor", "urlparse3", "chardet"], entry_points={ 'console_scripts': [ 'urless = urless.urless:main', diff --git a/urless/__init__.py b/urless/__init__.py index c6ac58e..f11231d 100644 --- a/urless/__init__.py +++ b/urless/__init__.py @@ -1 +1 @@ -__version__="1.0" \ No newline at end of file +__version__="1.1" diff --git a/urless/urless.py b/urless/urless.py index 51eec6c..9d9ac42 100644 --- a/urless/urless.py +++ b/urless/urless.py @@ -11,6 +11,7 @@ from typing import Pattern import yaml import argparse +import chardet from signal import SIGINT, signal from urllib.parse import urlparse from termcolor import colored @@ -438,30 +439,32 @@ def processLine(line): if args.ignore_querystring: line = line.split('?')[0].split('#')[0] return line - + def processInput(): - global linesOrigCount try: if not sys.stdin.isatty(): for line in sys.stdin: processUrl(processLine(line)) else: + with open(os.path.expanduser(args.input), 'rb') as f: + result = chardet.detect(f.read()) # or readline if the file is large + try: - inFile = open(os.path.expanduser(args.input), 'r') + inFile = open(os.path.expanduser(args.input), 'r', encoding=result['encoding']) lines = inFile.readlines() linesOrigCount = len(lines) for line in lines: processUrl(processLine(line)) except Exception as e: writerr(colored('ERROR processInput 2 ' + str(e), 'red')) - + try: inFile.close() except: - pass + pass except Exception as e: - writerr(colored('ERROR processInput 1: ' + str(e), 'red')) + writerr(colored('ERROR processInput 1: ' + str(e), 'red')) def processOutput(): global linesFinalCount, linesOrigCount, patternsGUID, patternsInt, patternsCustomID, patternsLang @@ -717,4 +720,4 @@ def main(): if __name__ == '__main__': main() - \ No newline at end of file +