From 8b9a8c992f31b9eb78cc924fc2b3747856154f85 Mon Sep 17 00:00:00 2001
From: Mennaruuk <52135169+Mennaruuk@users.noreply.github.com>
Date: Fri, 11 Feb 2022 00:50:12 -0500
Subject: [PATCH] Fix problems

---
 twayback.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/twayback.py b/twayback.py
index 5fc8fa5..65ef3d5 100644
--- a/twayback.py
+++ b/twayback.py
@@ -1,11 +1,10 @@
-import requests, re, os, argparse, sys, subprocess, time
+import requests, re, os, argparse, sys, waybackpack, time, subprocess
 from requests import Session
 session = Session()
 from tqdm import tqdm
 import colorama
 from colorama import  Fore, Back, Style
 colorama.init(autoreset=True)
-from rich.progress import track
 os.system('cls')
 
 parser = argparse.ArgumentParser()
@@ -21,9 +20,9 @@
 data1 =f"https://twitter.com/{username}"
 results = []
 headers = {'user-agent':'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'}
+
 response = session.get(data1, headers=headers, allow_redirects=False)
 status_code = response.status_code
-
 if status_code == 200:
     print(Back.GREEN + Fore.WHITE + f"Account is ACTIVE\n")
     time.sleep(1)
@@ -31,19 +30,22 @@
     print(Back.RED + Fore.WHITE + f"Account is SUSPENDED. This means all of {Back.WHITE + Fore.RED + username + Back.RED + Fore.WHITE}'s Tweets will be downloaded.\n")
     time.sleep(3)
 else:
-    print(Back.YELLOW + Fore.WHITE + f"No one currently has this handle. Twayback will search for a history of this handle's Tweets.\n")
+    print(Back.RED + Fore.WHITE + f"No one currently has this handle. Twayback will search for a history of this handle's Tweets.\n")
     time.sleep(2)
 
 stuck = "(Don't worry, Twayback isn't stuck!"
+
 print(f"Please wait. Twayback is searching far and wide for deleted tweets from {username}.\nDrink some delicious coffee while this gets done.\n\n{Back.MAGENTA + stuck + Fore.WHITE}\nDepending on the number of Tweets, this step might take several minutes.)\n")
 
+print(f"Grabbing links for Tweets from the Wayback Machine...\n")
+
 link = f"https://web.archive.org/cdx/search/cdx?url=twitter.com/{username}/status&matchType=prefix&filter=statuscode:200&from={fromdate}&to={todate}"
 data2 = []
-blocklist = []
+
 c = session.get(link).text
 urls = re.findall(r'https?://(?:www\.)?(?:mobile\.)?twitter\.com/(?:#!/)?\w+/status(?:es)?/\d+', c)
+blocklist = []
 blocks = re.findall(r'Blocked', c)
-
 for block in blocks:
     blocklist.append(f"{block}") 
     if any("Blocked" in s for s in blocklist):
@@ -52,9 +54,6 @@
     else:
         pass
 
-for url in urls:
-    data2.append(f"{url}")
-
 username_character_count = len(username)
 if username_character_count == 1:
     data2 = [g for g in data2 if len(str(g)) <= 48]
@@ -88,9 +87,12 @@
     data2 = [g for g in data2 if len(str(g)) <= 62]
 else:
     pass
+ 
+for url in urls:
+    data2.append(f"{url}")
     
 # Remove duplicate URLs
-data3 = list(dict.fromkeys(data2))
+data3 = list(set(data2))
 
 number_of_elements = len(data3)
 if number_of_elements >= 1000:
@@ -100,9 +102,9 @@
 
 # Obtain status codes
 results = []
-headers = {'user-agent':'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'}
+headers = {'user-agent':'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'}
 
-for url in track(data3):
+for url in tqdm(data3):
     response = session.get(url, headers=headers)
     status_code = response.status_code
     results.append((url, status_code))
@@ -114,10 +116,10 @@
 # Filter for only deleted Tweets
 data4 = [g for g in data3 if " 404" in g]
 data5 = [g.replace(' 404', '') for g in data4]
-data6 = [g for g in data5 if "," not in g ]
+
 os.system('cls')
 
-number_of_elements = len(data6)
+number_of_elements = len(data5)
 
 if number_of_elements == 1:
     answer = input(f"\n{number_of_elements} deleted Tweet has been found.\nWould you like to download it? Type yes or no. Then press Enter. \n")
@@ -126,15 +128,13 @@
     exit()
 else:
     answer = input(f"\n{number_of_elements} deleted Tweets have been found.\nWould you like to download them all? Type yes or no. Then press Enter. \n")
-
 os.system('cls')
-
 # Use waybackpack to download URLs
 if answer.lower() == 'yes':
-    for url in tqdm(data6, position=0, leave=True):
+    for url in tqdm(data5, position=0, leave=True):
         subprocess.run(f"waybackpack -d {username}  --uniques-only --raw {url}", text=True, capture_output=True)
         with open(f'{username}/{username}.txt', 'w') as file:
-            for row in data6:
+            for row in data5:
                 s = "".join(map(str, row))
                 file.write(s+'\n')
     print(f"\nAll Tweets have been successfully downloaded!\nThey can be found as HTML files inside the folder {Back.MAGENTA + Fore.WHITE + username + Back.BLACK + Fore.WHITE}.\nAlso, a text file ({username}.txt) is saved, which lists all URLs for the deleted Tweets.")