Skip to content

Commit

Permalink
Fixes Publishing Data to Elasticsearch (twintproject#994)
Browse files Browse the repository at this point in the history
* Fix ES publishing

* Remove hour() from elasticsearch.py
  • Loading branch information
tweedge authored Oct 29, 2020
1 parent 52ee752 commit 2348211
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 21 deletions.
31 changes: 12 additions & 19 deletions twint/storage/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## TODO - Fix Weekday situation
from elasticsearch import Elasticsearch, helpers
from geopy.geocoders import Nominatim
from time import strftime, localtime
from datetime import datetime
import contextlib
import sys

Expand Down Expand Up @@ -58,7 +58,7 @@ def createIndex(config, instance, **scope):
"properties": {
"id": {"type": "long"},
"conversation_id": {"type": "long"},
"created_at": {"type": "long"},
"created_at": {"type": "text"},
"date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"timezone": {"type": "keyword"},
"place": {"type": "keyword"},
Expand Down Expand Up @@ -193,25 +193,18 @@ def weekday(day):

return weekdays[day]

def hour(datetime):
return strftime("%H", localtime(datetime))

def Tweet(Tweet, config):
global _index_tweet_status
global _is_near_def
weekdays = {
"Monday": 1,
"Tuesday": 2,
"Wednesday": 3,
"Thursday": 4,
"Friday": 5,
"Saturday": 6,
"Sunday": 7,
}
day = weekdays[strftime("%A", localtime(Tweet.datetime/1000))]
date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")

actions = []

try:
retweet = Tweet.retweet
except AttributeError:
retweet = None

dt = f"{Tweet.datestamp} {Tweet.timestamp}"

j_data = {
Expand All @@ -231,10 +224,10 @@ def Tweet(Tweet, config):
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
"day": day,
"hour": hour(Tweet.datetime/1000),
"day": date_obj.weekday(),
"hour": date_obj.hour,
"link": Tweet.link,
"retweet": Tweet.retweet,
"retweet": retweet,
"essid": config.Essid,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
Expand All @@ -245,7 +238,7 @@ def Tweet(Tweet, config):
"near": config.Near
}
}
if Tweet.retweet:
if retweet is not None:
j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
j_data["_source"].update({"user_rt": Tweet.user_rt})
j_data["_source"].update({"retweet_id": Tweet.retweet_id})
Expand Down
3 changes: 1 addition & 2 deletions twint/storage/panda.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import datetime, pandas as pd, warnings
from time import strftime, localtime
from .elasticsearch import hour
from twint.tweet import Tweet_formats

Tweets_df = None
Expand Down Expand Up @@ -85,7 +84,7 @@ def update(object, config):
"username": Tweet.username,
"name": Tweet.name,
"day": day,
"hour": hour(datetime_ms/1000),
"hour": datetime.strptime("%H", localtime(datetime_ms/1000)),
"link": Tweet.link,
"urls": Tweet.urls,
"photos": Tweet.photos,
Expand Down

0 comments on commit 2348211

Please sign in to comment.