Skip to content

Commit

Permalink
Merge pull request #6 from Nishantbhagat57/main
Browse files Browse the repository at this point in the history
Added chardet to detect the encoding of the content
  • Loading branch information
xnl-h4ck3r authored Dec 4, 2023
2 parents 146b894 + 583bb2e commit a5e94ab
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 10 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
## Changelog

- v1.1

- Changed

- Add support to automatically identify file encoding.

- v1.0

- Changed
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<center><img src="https://github.com/xnl-h4ck3r/urless/blob/main/urless/images/title.png"></center>

## About - v1.0
## About - v1.1

This is a tool used to de-clutter a list of URLs.
As a starting point, I took the amazing tool [uro](https://github.com/s0md3v/uro/) by Somdev Sangwan. But I wanted to change a few things, make some improvements (like deal with GUIDs) and make it more customizable.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
author="@xnl-h4ck3r",
url="https://github.com/xnl-h4ck3r/urless",
zip_safe=False,
install_requires=["argparse", "pyyaml", "termcolor", "urlparse3"],
install_requires=["argparse", "pyyaml", "termcolor", "urlparse3", "chardet"],
entry_points={
'console_scripts': [
'urless = urless.urless:main',
Expand Down
2 changes: 1 addition & 1 deletion urless/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__="1.0"
__version__="1.1"
17 changes: 10 additions & 7 deletions urless/urless.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Pattern
import yaml
import argparse
import chardet
from signal import SIGINT, signal
from urllib.parse import urlparse
from termcolor import colored
Expand Down Expand Up @@ -438,30 +439,32 @@ def processLine(line):
if args.ignore_querystring:
line = line.split('?')[0].split('#')[0]
return line

def processInput():

global linesOrigCount
try:
if not sys.stdin.isatty():
for line in sys.stdin:
processUrl(processLine(line))
else:
with open(os.path.expanduser(args.input), 'rb') as f:
result = chardet.detect(f.read()) # or readline if the file is large

try:
inFile = open(os.path.expanduser(args.input), 'r')
inFile = open(os.path.expanduser(args.input), 'r', encoding=result['encoding'])
lines = inFile.readlines()
linesOrigCount = len(lines)
for line in lines:
processUrl(processLine(line))
except Exception as e:
writerr(colored('ERROR processInput 2 ' + str(e), 'red'))

try:
inFile.close()
except:
pass
pass
except Exception as e:
writerr(colored('ERROR processInput 1: ' + str(e), 'red'))
writerr(colored('ERROR processInput 1: ' + str(e), 'red'))

def processOutput():
global linesFinalCount, linesOrigCount, patternsGUID, patternsInt, patternsCustomID, patternsLang
Expand Down Expand Up @@ -717,4 +720,4 @@ def main():

if __name__ == '__main__':
main()


0 comments on commit a5e94ab

Please sign in to comment.