diff --git a/elasticsearch/index-tweets.json b/elasticsearch/index-tweets.json index 82c3244e..fb74c91a 100644 --- a/elasticsearch/index-tweets.json +++ b/elasticsearch/index-tweets.json @@ -28,6 +28,7 @@ PUT twinttweets "nretweets": {"type": "integer"}, "quote_url": {"type": "text"}, "video": {"type": "integer"}, + "thumbnail": {"type": "text"}, "search": {"type": "text"}, "near": {"type": "text"}, "geo_near": {"type": "geo_point"}, diff --git a/twint/format.py b/twint/format.py index 0c480076..2ebb6ded 100644 --- a/twint/format.py +++ b/twint/format.py @@ -15,6 +15,7 @@ def Tweet(config, t): output = output.replace("{urls}", ",".join(t.urls)) output = output.replace("{photos}", ",".join(t.photos)) output = output.replace("{video}", str(t.video)) + output = output.replace("{thumbnail}", t.thumbnail) output = output.replace("{tweet}", t.tweet) output = output.replace("{language}", t.lang) output = output.replace("{hashtags}", ",".join(t.hashtags)) diff --git a/twint/storage/db.py b/twint/storage/db.py index f4b199ee..1b2d2bcc 100644 --- a/twint/storage/db.py +++ b/twint/storage/db.py @@ -76,6 +76,7 @@ def init(db): cashtags text, urls text, photos text, + thumbnail text, quote_url text, video integer, geo text, @@ -265,6 +266,7 @@ def tweets(conn, Tweet, config): ",".join(Tweet.cashtags), ",".join(Tweet.urls), ",".join(Tweet.photos), + Tweet.thumbnail, Tweet.quote_url, Tweet.video, Tweet.geo, @@ -274,7 +276,7 @@ def tweets(conn, Tweet, config): Tweet.translate, Tweet.trans_src, Tweet.trans_dest) - cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) + cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) if config.Favorites: query = 'INSERT INTO favorites VALUES(?,?)' diff --git a/twint/storage/elasticsearch.py b/twint/storage/elasticsearch.py index a1bc58b3..4351f307 100644 --- a/twint/storage/elasticsearch.py +++ b/twint/storage/elasticsearch.py @@ -81,6 +81,7 @@ def createIndex(config, instance, **scope): "nretweets": {"type": "integer"}, "quote_url": {"type": "text"}, "video": {"type":"integer"}, + "thumbnail": {"type":"text"}, "search": {"type": "text"}, "near": {"type": "text"}, "geo_near": {"type": "geo_point"}, @@ -256,6 +257,8 @@ def Tweet(Tweet, config): for photo in Tweet.photos: _photos.append(photo) j_data["_source"].update({"photos": _photos}) + if Tweet.thumbnail: + j_data["_source"].update({"thumbnail": Tweet.thumbnail}) if Tweet.mentions: _mentions = [] for mention in Tweet.mentions: diff --git a/twint/storage/panda.py b/twint/storage/panda.py index 2744899e..e11c3773 100644 --- a/twint/storage/panda.py +++ b/twint/storage/panda.py @@ -86,6 +86,10 @@ def update(object, config): "day": day, "hour": hour(Tweet.datetime/1000), "link": Tweet.link, + "urls": Tweet.urls, + "photos": Tweet.photos, + "video": Tweet.video, + "thumbnail": Tweet.thumbnail, "retweet": Tweet.retweet, "nlikes": int(Tweet.likes_count), "nreplies": int(Tweet.replies_count), diff --git a/twint/storage/write_meta.py b/twint/storage/write_meta.py index 172fb126..c3c189d7 100644 --- a/twint/storage/write_meta.py +++ b/twint/storage/write_meta.py @@ -24,6 +24,7 @@ def tweetData(t): "retweet": t.retweet, "quote_url": t.quote_url, "video": t.video, + "thumbnail": t.thumbnail, "near": t.near, "geo": t.geo, "source": t.source, @@ -64,6 +65,7 @@ def tweetFieldnames(): "retweet", "quote_url", "video", + "thumbnail", "near", "geo", "source", diff --git a/twint/tweet.py b/twint/tweet.py index 82b79777..9eada35e 100644 --- a/twint/tweet.py +++ b/twint/tweet.py @@ -74,6 +74,16 @@ def getRetweet(tw, _config): return _rt_id, _rt_username return '', '' +def getThumbnail(tw): + """Get Thumbnail + """ + divs = tw.find_all("div","PlayableMedia-player") + thumb = "" + for div in divs: + thumb = div.attrs["style"].split("url('")[-1] + thumb = thumb.replace("')","") + return thumb + def Tweet(tw, config): """Create Tweet object """ @@ -97,6 +107,7 @@ def Tweet(tw, config): t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")] t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")] t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0 + t.thumbnail = getThumbnail(tw) t.tweet = getText(tw) t.lang = tw.find('p', 'tweet-text')['lang'] t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]