Skip to content

Commit

Permalink
Add thumbnail attribute to tweet object (twintproject#889)
Browse files Browse the repository at this point in the history
* Add thumbnail attribute to tweet object

Fetch the video thumbnail of a tweet

* Add Thumbnail support for DB, CSV, JSON and Elasticsearch

* Update panda.py

Add missing datafields: video, photos, urls and thumbnail.
  • Loading branch information
RtiM0 authored and darvell committed Nov 16, 2020
1 parent 2436863 commit 6f385de
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 1 deletion.
1 change: 1 addition & 0 deletions elasticsearch/index-tweets.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ PUT twinttweets
"nretweets": {"type": "integer"},
"quote_url": {"type": "text"},
"video": {"type": "integer"},
"thumbnail": {"type": "text"},
"search": {"type": "text"},
"near": {"type": "text"},
"geo_near": {"type": "geo_point"},
Expand Down
1 change: 1 addition & 0 deletions twint/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def Tweet(config, t):
output = output.replace("{urls}", ",".join(t.urls))
output = output.replace("{photos}", ",".join(t.photos))
output = output.replace("{video}", str(t.video))
output = output.replace("{thumbnail}", t.thumbnail)
output = output.replace("{tweet}", t.tweet)
output = output.replace("{language}", t.lang)
output = output.replace("{hashtags}", ",".join(t.hashtags))
Expand Down
4 changes: 3 additions & 1 deletion twint/storage/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def init(db):
cashtags text,
urls text,
photos text,
thumbnail text,
quote_url text,
video integer,
geo text,
Expand Down Expand Up @@ -265,6 +266,7 @@ def tweets(conn, Tweet, config):
",".join(Tweet.cashtags),
",".join(Tweet.urls),
",".join(Tweet.photos),
Tweet.thumbnail,
Tweet.quote_url,
Tweet.video,
Tweet.geo,
Expand All @@ -274,7 +276,7 @@ def tweets(conn, Tweet, config):
Tweet.translate,
Tweet.trans_src,
Tweet.trans_dest)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)

if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)'
Expand Down
3 changes: 3 additions & 0 deletions twint/storage/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def createIndex(config, instance, **scope):
"nretweets": {"type": "integer"},
"quote_url": {"type": "text"},
"video": {"type":"integer"},
"thumbnail": {"type":"text"},
"search": {"type": "text"},
"near": {"type": "text"},
"geo_near": {"type": "geo_point"},
Expand Down Expand Up @@ -256,6 +257,8 @@ def Tweet(Tweet, config):
for photo in Tweet.photos:
_photos.append(photo)
j_data["_source"].update({"photos": _photos})
if Tweet.thumbnail:
j_data["_source"].update({"thumbnail": Tweet.thumbnail})
if Tweet.mentions:
_mentions = []
for mention in Tweet.mentions:
Expand Down
4 changes: 4 additions & 0 deletions twint/storage/panda.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ def update(object, config):
"day": day,
"hour": hour(Tweet.datetime/1000),
"link": Tweet.link,
"urls": Tweet.urls,
"photos": Tweet.photos,
"video": Tweet.video,
"thumbnail": Tweet.thumbnail,
"retweet": Tweet.retweet,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
Expand Down
2 changes: 2 additions & 0 deletions twint/storage/write_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def tweetData(t):
"retweet": t.retweet,
"quote_url": t.quote_url,
"video": t.video,
"thumbnail": t.thumbnail,
"near": t.near,
"geo": t.geo,
"source": t.source,
Expand Down Expand Up @@ -64,6 +65,7 @@ def tweetFieldnames():
"retweet",
"quote_url",
"video",
"thumbnail",
"near",
"geo",
"source",
Expand Down
11 changes: 11 additions & 0 deletions twint/tweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ def getRetweet(tw, _config):
return _rt_id, _rt_username
return '', ''

def getThumbnail(tw):
"""Get Thumbnail
"""
divs = tw.find_all("div","PlayableMedia-player")
thumb = ""
for div in divs:
thumb = div.attrs["style"].split("url('")[-1]
thumb = thumb.replace("')","")
return thumb

def Tweet(tw, config):
"""Create Tweet object
"""
Expand All @@ -97,6 +107,7 @@ def Tweet(tw, config):
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
t.thumbnail = getThumbnail(tw)
t.tweet = getText(tw)
t.lang = tw.find('p', 'tweet-text')['lang']
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
Expand Down

0 comments on commit 6f385de

Please sign in to comment.