Skip to content

Commit

Permalink
Print emojis usage by user (issue #37)
Browse files Browse the repository at this point in the history
  • Loading branch information
mar-muel committed Feb 9, 2020
1 parent 7e076f4 commit 520d8e6
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
8 changes: 8 additions & 0 deletions visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def cloud(self):
args = parser.parse_args(sys.argv[2:])
main(args)

def emoji(self):
from visualizers.emoji import main
parser = ArgParseDefault(description='Visualize emoji usage')
parser = add_load_data_args(parser)
parser.add_argument('--top-n-users', dest='top_n_users', type=int, default=10, help='Show top n users')
parser.add_argument('--top-n-emojis', dest='top_n_emojis', type=int, default=20, help='Show top n emojis per user')
args = parser.parse_args(sys.argv[2:])
main(args)

if __name__ == '__main__':
ArgParse()
47 changes: 47 additions & 0 deletions visualizers/emoji.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import re
import logging
from visualizers.utils import save_fig
from utils import load_data
from collections import Counter
import itertools
import numpy as np

log = logging.getLogger(__name__)
EMOJI_PATTERN = re.compile("["

This comment has been minimized.

Copy link
@KindDragon

KindDragon Feb 19, 2020

You can use emojis pip package.

from emojis.emojis import RE_EMOJI_TO_TEXT

and use RE_EMOJI_TO_TEXT instead of that

This comment has been minimized.

Copy link
@KindDragon

KindDragon Feb 19, 2020

You regex return 8,302 emojis, but regex from package returns 21,735 emojis

This comment has been minimized.

Copy link
@mar-muel

mar-muel Feb 19, 2020

Author Collaborator

Good point! We could probably "steal" the list from here which would avoid adding a dependency.

This comment has been minimized.

Copy link
@KindDragon

KindDragon Feb 19, 2020

But current code doesn't work well for me with emojis that have skin tones or men/women versions:

    u':person_facepalming:': u'\U0001F926',
    u':person_facepalming_dark_skin_tone:': u'\U0001F926\U0001F3FF',
    u':person_facepalming_light_skin_tone:': u'\U0001F926\U0001F3FB',
    u':person_facepalming_medium-dark_skin_tone:': u'\U0001F926\U0001F3FE',
    u':person_facepalming_medium-light_skin_tone:': u'\U0001F926\U0001F3FC',
    u':person_facepalming_medium_skin_tone:': u'\U0001F926\U0001F3FD',
    u':man_facepalming:': u'\U0001F926\U0000200D\U00002642\U0000FE0F',
    u':man_facepalming_dark_skin_tone:': u'\U0001F926\U0001F3FF\U0000200D\U00002642\U0000FE0F',
    u':man_facepalming_light_skin_tone:': u'\U0001F926\U0001F3FB\U0000200D\U00002642\U0000FE0F',
    u':man_facepalming_medium-dark_skin_tone:': u'\U0001F926\U0001F3FE\U0000200D\U00002642\U0000FE0F',
    u':man_facepalming_medium-light_skin_tone:': u'\U0001F926\U0001F3FC\U0000200D\U00002642\U0000FE0F',
    u':man_facepalming_medium_skin_tone:': u'\U0001F926\U0001F3FD\U0000200D\U00002642\U0000FE0F',

Script print for me only person_facepalming emoji

u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)

def main(args):
# load data
df = load_data(args)
log.info(f'Computing emoji frequencies on {len(df):,} messages... 😤')
# Compute emoji frequencies
freqs = compute_emoji_freqs(df)
print_emoji_freqs(freqs, top_n_users=args.top_n_users, top_n_emojis=args.top_n_emojis)

def compute_emoji_freqs(df):
df.loc[:, 'emojis'] = df.text.str.findall(EMOJI_PATTERN)
df.dropna(subset=['emojis'], inplace=True)
df = df.loc[df.emojis.apply(lambda s: len(s)) > 0, ['emojis', 'senderName', 'outgoing']]
df.loc[:, 'emojis'] = df.emojis.apply(lambda s: list(''.join(s)))
num_emojis = df.emojis.apply(lambda s: len(s)).sum()
log.info(f'Found {len(df):,} messages containing {num_emojis:,} emojis! 🤗🤗')
df.loc[df['outgoing'], 'senderName'] = 'You'
freqs = {}
for name, group in df.groupby('senderName'):
emojis = group.emojis.tolist()
freqs[name] = Counter(itertools.chain.from_iterable(emojis))
return freqs

def print_emoji_freqs(freqs, top_n_users=10, top_n_emojis=10):
# selecting top n emoji users
by_user = {k: sum(c.values()) for k, c in freqs.items()}
top_users = [c[0] for c in Counter(by_user).most_common(top_n_users)]
# print out top users emoji usage
for user in top_users:
print(f'Favorite emojis for {user}')
emojis = ' '.join([emoji for emoji, count in freqs[user].most_common(top_n_emojis)])
print(emojis)

0 comments on commit 520d8e6

Please sign in to comment.