From 81ecfbaec45cfebf7c547ed5daae5e59652fcac5 Mon Sep 17 00:00:00 2001
From: Levente Csoke <leventec3@gmail.com>
Date: Wed, 12 Aug 2020 19:58:28 +0200
Subject: [PATCH 1/3] changed how we gather individual people from convos;
 solved and added some todos; 1 test failing only

---
 Conversations.py                              | 116 ---------------
 FacebookData.py                               |  40 -----
 Friends.py                                    |  36 -----
 Individual.py                                 |  47 ------
 People.py                                     |  79 ----------
 __main__.py                                   |   5 +
 Miner.py => miner/App.py                      |  25 ++--
 .../ConversationAnalyzer.py                   |  22 +--
 miner/Conversations.py                        | 139 ++++++++++++++++++
 miner/FacebookData.py                         |  30 ++++
 miner/Friends.py                              |  27 ++++
 Group.py => miner/Group.py                    |   2 +-
 miner/Individual.py                           |  63 ++++++++
 Me.py => miner/Me.py                          |   2 +-
 .../MessagingAnalyzer.py                      |  25 ++--
 miner/People.py                               |  52 +++++++
 Visualizer.py => miner/Visualizer.py          |   6 +-
 tests/TestMessages.py => miner/__init__.py    |   0
 utils.py => miner/utils.py                    |  90 +++++++-----
 tests/TestPeople.py                           |  36 -----
 tests/conftest.py                             |  10 +-
 ...alyzer.py => test_ConversationAnalyzer.py} |  23 ++-
 ...Conversations.py => test_Conversations.py} |  17 ++-
 tests/{TestFriends.py => test_Friends.py}     |   4 +-
 tests/test_Messages.py                        |   0
 ...gAnalyzer.py => test_MessagingAnalyzer.py} |   9 +-
 tests/test_People.py                          |  39 +++++
 tests/test_utils.py                           |   2 +-
 28 files changed, 472 insertions(+), 474 deletions(-)
 delete mode 100644 Conversations.py
 delete mode 100644 FacebookData.py
 delete mode 100644 Friends.py
 delete mode 100644 Individual.py
 delete mode 100644 People.py
 create mode 100644 __main__.py
 rename Miner.py => miner/App.py (63%)
 rename ConversationAnalyzer.py => miner/ConversationAnalyzer.py (84%)
 create mode 100644 miner/Conversations.py
 create mode 100644 miner/FacebookData.py
 create mode 100644 miner/Friends.py
 rename Group.py => miner/Group.py (92%)
 create mode 100644 miner/Individual.py
 rename Me.py => miner/Me.py (78%)
 rename MessagingAnalyzer.py => miner/MessagingAnalyzer.py (93%)
 create mode 100644 miner/People.py
 rename Visualizer.py => miner/Visualizer.py (88%)
 rename tests/TestMessages.py => miner/__init__.py (100%)
 rename utils.py => miner/utils.py (82%)
 delete mode 100644 tests/TestPeople.py
 rename tests/{TestConversationAnalyzer.py => test_ConversationAnalyzer.py} (94%)
 rename tests/{TestConversations.py => test_Conversations.py} (79%)
 rename tests/{TestFriends.py => test_Friends.py} (91%)
 create mode 100644 tests/test_Messages.py
 rename tests/{TestMessagingAnalyzer.py => test_MessagingAnalyzer.py} (98%)
 create mode 100644 tests/test_People.py

diff --git a/Conversations.py b/Conversations.py
deleted file mode 100644
index 3fb1fbd..0000000
--- a/Conversations.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-from FacebookData import FacebookData
-import pandas as pd
-
-from datetime import datetime
-
-MESSAGE_SUBPATH = 'messages/inbox'
-
-
-class Conversations:
-    def __init__(self, data_path):
-        self.data_path = f'{data_path}/{MESSAGE_SUBPATH}'
-
-    def get_people(self):
-        json_paths = self.walk_directory_and_search('.json')
-        return self.extract_names_from_convos(json_paths)
-
-    def walk_directory_and_search(self, extension):
-        paths = []
-        for root, dirs, files in os.walk(self.data_path):
-            for name in files:
-                if name.endswith(extension):
-                    paths.append(os.path.join(root, name))
-        return paths
-
-    # TODO simplify this function!! also this takes very long
-    @staticmethod
-    def extract_names_from_convos(jsons):
-        name_data_map = {}
-        count = 0
-        for file in jsons:
-            msg = Messages(file)
-            for participant in msg.participants:
-                key = participant if msg.ttype == 'Regular' else f'group_{count}'
-                if key == 'Facebook User':  # TODO ?? what to do with this??
-                    continue
-                if name_data_map.get(key) and key.startswith(
-                        'group'):  # making sure run only once even if it is a group
-                    continue
-                if name_data_map.get(key):
-                    dfs = [name_data_map[key]['messages'], msg.df]
-                    name_data_map[key]['messages'] = pd.concat(dfs, ignore_index=False).sort_index()
-                else:
-                    name_data_map[key] = {
-                        'title': msg.title,
-                        'compact_name': msg.compact_names,
-                        # 'participants': msg.participants + ['Levente Csőke'],
-                        'participants': msg.participants,
-                        'messages': msg.df,
-                        'friend': None,
-                        'messages_dir': msg.messages_dir,
-                        'media_dir': msg.media_dir
-                    }
-            if msg.ttype == 'RegularGroup':
-                count += 1
-
-        return name_data_map
-
-
-class Messages(FacebookData):
-    def __init__(self, json_path):
-        super().__init__(json_path)
-        self.to_df()
-        self.set_date_as_index()
-
-    def to_df(self):
-        self._df = pd.DataFrame(self.decoded.get('messages'))
-
-    def set_date_as_index(self):
-        # NOTE maybe not needed; could calculate real time
-        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
-        self._df = self._df.set_index(date_series).iloc[::-1]
-
-    @property
-    def names(self):
-        return pd.DataFrame(self.participants)[0]
-
-    @property
-    def participants(self):
-        participants = self.decoded.get('participants')
-        # TODO I should be IN
-        # but this breaks stuff at TestMessagingAnalyzer
-        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
-        # return [p.get('name') for p in participants if p.get('name')]
-
-    @property
-    def title(self):
-        return self.decoded.get('title')
-
-    @property
-    def ttype(self):
-        return self.decoded.get('thread_type')
-
-    @property
-    def messages_dir(self):
-        thread_path = self.decoded.get('thread_path')
-        if not thread_path.startswith('inbox/'):
-            raise ValueError('Something is not okay.')
-        # TODO here or in the upper function where we extract names
-        return thread_path.split('/')[1].lower()
-
-    @property
-    def media_dir(self):
-        # todo what should the path contain
-        for media in ['photos', 'gifs', 'files', 'videos', 'audio']:
-            if media in self._df.columns:
-                media_in_msg = list(self._df[media][self._df[media].notnull()])
-                # if len(media_in_msg) > 1:  # TODO is this ok. i think it is. think multiple photos sent once
-                #    print('Media in msg is bigger than 1')
-                uri = media_in_msg[0][0].get('uri')
-                return os.path.dirname(os.path.dirname(uri))
-        return None
-
-    @staticmethod
-    def ts_to_date(date):
-        return datetime.fromtimestamp(date / 1000)  # .strftime('%Y-%m-%d')
diff --git a/FacebookData.py b/FacebookData.py
deleted file mode 100644
index a82c896..0000000
--- a/FacebookData.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from utils import read_json, decode_text, accents_map
-
-
-class FacebookData:
-    def __init__(self, json_path):
-        self.json_path = json_path
-        self._df = None
-
-    @property
-    def df(self):
-        return self._df
-
-    @property
-    def decoded(self):
-        return decode_text(self.json)
-
-    @property
-    def json(self):
-        return read_json(self.json_path)
-
-    @property
-    def compact_names(self):
-        # NOTE this is the place where we change pd/np to builtin
-        # do we have to do this?
-        name_list = list(self.without_accent_and_whitespace(self.lower_names(self.names)))
-        return name_list[0] if len(name_list) == 1 else name_list
-
-    @staticmethod
-    def lower_names(col):
-        return col.str.lower()
-
-    @staticmethod
-    def without_accent_and_whitespace(col):
-        def replace_accents(text):
-            for char in accents_map.keys():
-                if char in text:
-                    text = text.replace(char, accents_map[char])
-            return text.replace(' ', '')
-
-        return col.apply(replace_accents)
diff --git a/Friends.py b/Friends.py
deleted file mode 100644
index 6e0e991..0000000
--- a/Friends.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-import os
-from FacebookData import FacebookData
-from utils import accents_map
-
-
-class Friends(FacebookData):
-
-    def __init__(self, *args):
-        super().__init__(*args)
-
-        # self.path = 'data/friends'
-        # self.json_path = f'{self.path}/friends.json'
-
-        self.to_df()
-
-    def get_people(self):
-        names = {}
-        for name, compact in zip(self.names, self.compact_names):
-            names[name] = {
-                'title': name,
-                'compact_name': compact,
-                'messages': None,
-                'friend': True,
-                'participants': None,
-                'messages_dir': None,
-                'media_dir': None
-            }
-        return names
-
-    def to_df(self):
-        self._df = pd.DataFrame(self.decoded.get('friends'))
-
-    @property
-    def names(self):
-        return self.df.name
diff --git a/Individual.py b/Individual.py
deleted file mode 100644
index a9f8d03..0000000
--- a/Individual.py
+++ /dev/null
@@ -1,47 +0,0 @@
-class Individual:
-    def __init__(self, name=None, title=None,compact=None, messages=None, friend=None, messages_dir=None, media_dir=None,
-                 member_of=None):
-        self._name = name
-        self._title = title
-        self._compact_name = compact
-        self._messages = messages
-        self._friend = friend
-        self._messages_dir = messages_dir
-        self._media_dir = media_dir
-        self._member_of = member_of
-
-
-    def __repr__(self):
-        return self.name
-
-    @property
-    def name(self):
-        return self._name
-
-    @property
-    def title(self):
-        return self._title
-
-    @property
-    def messages(self):
-        return self._messages
-
-    @property
-    def friend(self):
-        return self._friend
-
-    @property
-    def media_dir(self):
-        return self._media_dir
-
-    @property
-    def messages_dir(self):
-        return self._messages_dir
-
-    @property
-    def compact_name(self):
-        return self._compact_name
-
-    @property
-    def member_of(self):
-        return self._member_of
diff --git a/People.py b/People.py
deleted file mode 100644
index 11d1887..0000000
--- a/People.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from Individual import Individual
-from Conversations import Conversations
-from Friends import Friends
-
-# from Me import Me
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
-import time
-from Group import Group
-
-
-# TODO we dont need both data and individuals... or??
-
-class People:
-    def __init__(self, path=None):
-        self.data_path = path if path else DATA_PATH
-        self._names = []
-        self._individuals = {}
-        self._groups = []
-        self._data = self.get_people()  # TODO is this supposed to be here or elsewhere
-        self.to_individuals()  # TODO is this supposed to be here or elsewhere
-
-    @property
-    def data(self):
-        return self._data
-
-    @property
-    def names(self):
-        return self._names
-
-    @property
-    def individuals(self):
-        return self._individuals
-
-    @property
-    def groups(self):
-        return self._groups
-
-    def get_people(self):
-        start = time.time()
-        friends = Friends(self.data_path + '/friends/friends.json')
-        people1 = friends.get_people()
-        print('friends: ', time.time() - start)
-
-        # TODO LATER too slow
-        # takes about 30 secs both
-        # read it once, store it in DB OR?
-        start = time.time()
-        conversations = Conversations(self.data_path)
-        people2 = conversations.get_people()
-        print('convos: ', time.time() - start)
-
-        return self.unify_people(people1, people2)
-
-    def to_individuals(self):  # TODO maybe rather split_convos or differentiate_convos
-        start = time.time()
-        for person, data in self._data.items():
-            if person.startswith('group'):
-                g = Group(name=data.get('name'), title=data.get('title'), messages=data.get('messages'),
-                          compact=data.get('compact_name'), messages_dir=data.get('messages_dir'),
-                          media_dir=data.get('media_dir'), members=None)
-                self._groups.append(g)
-            else:
-                indie = Individual(name=person, title=data.get('title'), messages=data.get('messages'),
-                                   compact=data.get('compact_name'), messages_dir=data.get('messages_dir'),
-                                   media_dir=data.get('media_dir'), member_of=None)
-                self._names.append(person)
-                self._individuals[person] = indie
-        print('indies: ', time.time() - start)
-
-    @staticmethod
-    def unify_people(friends, convos):
-        for person, data in friends.items():
-            if not convos.get(person):
-                convos[person] = data
-            convos[person]['friend'] = True
-        return convos
-
-# if __name__ == '__main__':
-#     p = People()
diff --git a/__main__.py b/__main__.py
new file mode 100644
index 0000000..33f7113
--- /dev/null
+++ b/__main__.py
@@ -0,0 +1,5 @@
+from miner.App import App
+
+if __name__ == '__main__':
+    app = App()
+    app.analyze_messages()
\ No newline at end of file
diff --git a/Miner.py b/miner/App.py
similarity index 63%
rename from Miner.py
rename to miner/App.py
index 3b32806..c2560bd 100644
--- a/Miner.py
+++ b/miner/App.py
@@ -1,11 +1,11 @@
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
+from miner.ConversationAnalyzer import ConversationAnalyzer
+from miner.MessagingAnalyzer import MessagingAnalyzer
+from miner.People import People
 
-from People import People
-from ConversationAnalyzer import ConversationAnalyzer
-from MessagingAnalyzer import MessagingAnalyzer
+DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
 
 
-class Miner:
+class App:
     def __init__(self):
         pass
 
@@ -15,8 +15,7 @@ def analyze_messages():
 
         stats = {}
 
-        for name, person in p.individuals.items():
-            #assert name == person.name, 'ERRRRRRROR!!!'
+        for name, person in p.data.items():
             if person.messages is None:
                 stats[person.name] = None
                 continue
@@ -24,7 +23,6 @@ def analyze_messages():
             stats[person.name] = analyzer.stats
             # if stats[person.name].get('message_count').get('me') > 5000:
             #    top[person.name] = stats[person.name]
-        example = stats['Dániel Nagy']
         print()
 
         # print('LEN: ', len(top.keys()))
@@ -33,13 +31,10 @@ def analyze_messages():
 
     @staticmethod
     def analyze_messaging():
-        p = People(path=DATA_PATH)
-
-        msg_analyzer = MessagingAnalyzer(p.names, p.individuals)
-
-        msgs = msg_analyzer.total_number_of_messages()
+        people = People(path=DATA_PATH)
+        msg_analyzer = MessagingAnalyzer(people)
 
 
 if __name__ == '__main__':
-    m = Miner()
-    m.analyze_messages()
+    app = App()
+    app.analyze_messages()
diff --git a/ConversationAnalyzer.py b/miner/ConversationAnalyzer.py
similarity index 84%
rename from ConversationAnalyzer.py
rename to miner/ConversationAnalyzer.py
index cfe1a95..a5928bf 100644
--- a/ConversationAnalyzer.py
+++ b/miner/ConversationAnalyzer.py
@@ -1,5 +1,5 @@
 import pandas as pd
-from utils import date_checker, period_checker, subject_checker, generate_time_series, get_stats_for_intervals
+from miner.utils import date_checker, period_checker, subject_checker, generate_date_series, get_stats_for_intervals
 
 
 class ConversationAnalyzer:
@@ -25,7 +25,7 @@ def get_stats(self, df, subject='all', start=None, end=None, period=None):
         return stats
 
     def get_time_series_data(self, subject='all', **kwargs):
-        time_series = generate_time_series(**kwargs)
+        time_series = generate_date_series(**kwargs)
         return get_stats_for_intervals(self.get_stats, self.df, time_series, subject=subject)
 
     @staticmethod
@@ -58,12 +58,6 @@ class ConversationStats:
     Statistics of conversation with one person.
     """
 
-    # TODO do we need this or not?!?! smh
-    # def __new__(cls, df, *args, **kwargs):
-    #     if not len(df.index):  # This deals with the case if input df is empty
-    #         return None
-    #     return super(ConversationStats, cls).__new__(cls, *args, **kwargs)
-
     def __init__(self, df):
         self.df = df
 
@@ -131,13 +125,13 @@ def char_count(self):
 
     # 10.
     @property
-    def most_used_chars(self):
-        return None  # TODO LATER or not  https://stackoverflow.com/questions/4131123/finding-the-most-frequent-character-in-a-string
-
-    # 11.
-    @property
     def rate_of_media_messages(self):
-        pass  # NOTE what?
+        """
+        TODO LATER
+        search for media messages all 5 of them
+        rate is only the second or third abstraction
+        """
+        pass
 
     def get_words(self):
         token_list = self.messages.str.lower().str.split()
diff --git a/miner/Conversations.py b/miner/Conversations.py
new file mode 100644
index 0000000..4f61ad9
--- /dev/null
+++ b/miner/Conversations.py
@@ -0,0 +1,139 @@
+import os
+from miner.Group import Group
+from miner.Individual import Individual
+from miner.FacebookData import FacebookData
+import pandas as pd
+from miner import utils
+from datetime import datetime
+
+
+class Conversations:
+    def __init__(self, data_path):
+        self.indie_convo_paths = []
+        self.group_convo_paths = []
+        self.deleted_user_convo_paths = []
+
+        self.data_path = f'{data_path}/{utils.MESSAGE_SUBPATH}'
+        self.order_paths()
+
+    def order_paths(self):
+        json_paths = utils.walk_directory_and_search(self.data_path, '.json')
+        self.differentiate_paths(json_paths)
+
+    def differentiate_paths(self, jsons):
+        for file in jsons:
+            msg = Messages(file)
+            if msg.title == 'Facebook User':
+                self.deleted_user_convo_paths.append(file)
+            elif msg.ttype == 'RegularGroup':
+                self.group_convo_paths.append(file)
+            elif msg.ttype == 'Regular':
+                self.indie_convo_paths.append(file)
+            else:
+                raise ValueError('Should not happen!')
+
+    def get_people_from_private_messages(self, name=None, membership=True):
+        name_data_map = {}
+        paths = self.indie_convo_paths
+        if name is not None:
+            paths = self.filter_by_name(name)
+        for file in paths:
+            messages = Messages(file)
+            name = messages.title
+            if name_data_map.get(name) is not None:
+                dfs = [name_data_map[name].messages, messages.df]
+                name_data_map[name].messages = pd.concat(dfs).sort_index()
+            else:
+                # TODO we may also want to get group messages where name is present
+                name_data_map[name] = self.create_individual(messages, membership=membership)
+        return name_data_map
+
+    def filter_by_name(self, name):
+        filtered_paths = []
+        compact_name = None if name is None else utils.replace_accents(name.lower())
+        for path in self.indie_convo_paths:
+            if compact_name in os.path.basename(os.path.dirname(os.path.normpath(path))):
+                filtered_paths.append(path)
+        return filtered_paths
+
+    def create_individual(self, messages, membership=None):
+        return Individual(
+            name=messages.title, title=messages.title,  # TODO depracate one of (name, title)
+            compact=messages.compact_names,
+            messages=messages.df,
+            messages_dir=messages.messages_dir,
+            media_dir=messages.media_dir,
+            member_of=self.group_membership(messages.title) if membership else None,
+        )
+
+    @staticmethod
+    def fill_data_map(message):
+        return {
+            'title': message.title,
+            'compact_name': message.compact_names,
+            # 'participants': msg.participants + ['Levente Csőke'],
+            'participants': message.participants,
+            'messages': message.df,
+            'friend': None,
+            'messages_dir': message.messages_dir,
+            'media_dir': message.media_dir
+        }
+
+    @staticmethod
+    def group_membership(name):
+        return None
+
+    @staticmethod
+    def json_is_a_group_msg(file):
+        msg = Messages(file)
+        return msg.ttype == 'RegularGroup'
+
+
+class Messages(FacebookData):
+    def __init__(self, json_path):
+        super().__init__(json_path)
+        self.to_df('messages')
+        self.set_date_as_index()
+
+    @property
+    def names(self):
+        return pd.DataFrame(self.participants)[0]
+
+    @property
+    def participants(self):
+        participants = self.decoded.get('participants')
+        # TODO I should be IN
+        # but this breaks stuff at TestMessagingAnalyzer
+        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
+        # return [p.get('name') for p in participants if p.get('name')]
+
+    @property
+    def title(self):
+        return self.decoded.get('title')
+
+    @property
+    def ttype(self):
+        return self.decoded.get('thread_type')
+
+    @property
+    def messages_dir(self):
+        thread_path = self.decoded.get('thread_path')
+        if not thread_path.startswith('inbox/'):
+            raise ValueError('Field `thread_path` should start with `inbox/`.')
+        return thread_path.split('inbox/')[1]
+
+    @property
+    def media_dir(self):
+        for media in utils.MEDIA_DIRS:
+            if media in self._df.columns:
+                media_in_msg = list(self._df[media][self._df[media].notnull()])
+                uri = media_in_msg[0][0].get('uri')
+                return os.path.dirname(os.path.dirname(uri)).split('inbox/')[1]
+
+    def set_date_as_index(self):
+        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
+        self._df = self._df.set_index(date_series).iloc[::-1]
+
+    @staticmethod
+    def ts_to_date(date):
+        return datetime.fromtimestamp(date / 1000)  # .strftime('%Y-%m-%d')
diff --git a/miner/FacebookData.py b/miner/FacebookData.py
new file mode 100644
index 0000000..babe74d
--- /dev/null
+++ b/miner/FacebookData.py
@@ -0,0 +1,30 @@
+from miner import utils
+import pandas as pd
+
+
+class FacebookData:
+    def __init__(self, json_path):
+        self.json_path = json_path
+        self._df = None
+
+    @property
+    def df(self):
+        return self._df
+
+    @property
+    def decoded(self):
+        return utils.decode_text(self.json)
+
+    @property
+    def json(self):
+        return utils.read_json(self.json_path)
+
+    @property
+    def compact_names(self):
+        # NOTE this is the place where we change pd/np to builtin
+        # do we have to do this?
+        name_list = list(utils.without_accent_and_whitespace(utils.lower_names(self.names)))  # should be just fine
+        return name_list[0] if len(name_list) == 1 else name_list
+
+    def to_df(self, field=None):
+        self._df = pd.DataFrame(self.decoded.get(field))
diff --git a/miner/Friends.py b/miner/Friends.py
new file mode 100644
index 0000000..7950155
--- /dev/null
+++ b/miner/Friends.py
@@ -0,0 +1,27 @@
+import pandas as pd
+import os
+from miner.FacebookData import FacebookData
+from miner.Individual import Individual
+
+
+class Friends(FacebookData):
+
+    def __init__(self, *args):
+        super().__init__(*args)
+        self.to_df('friends')
+
+    def get_people(self, name=None):
+        names = {}
+        for full_name, compact in zip(self.names, self.compact_names):
+            if name is not None and name != full_name:  # filtering for name
+                continue
+            names[full_name] = Individual(
+                name=full_name, title=full_name,  # TODO depracate one of (name, title)
+                compact=compact,
+                friend=True,
+            )
+        return names
+
+    @property
+    def names(self):
+        return self.df.name
diff --git a/Group.py b/miner/Group.py
similarity index 92%
rename from Group.py
rename to miner/Group.py
index 2152d77..94e1ed9 100644
--- a/Group.py
+++ b/miner/Group.py
@@ -1,4 +1,4 @@
-# TODO LATER groups should be searched by looking into jsons unfortunately :(
+# NOTE groups should be searched by looking into jsons unfortunately :(
 # because of directory says others
 # maybe we dont use groups right away?
 
diff --git a/miner/Individual.py b/miner/Individual.py
new file mode 100644
index 0000000..84d63b5
--- /dev/null
+++ b/miner/Individual.py
@@ -0,0 +1,63 @@
+class Individual:
+    def __init__(self, name=None, title=None, compact=None, messages=None, friend=None, messages_dir=None,
+                 media_dir=None,
+                 member_of=None):
+        self._name = name
+        self._title = title
+        self._compact_name = compact
+        self._messages = messages
+        self._friend = friend
+        self._messages_dir = messages_dir
+        self._media_dir = media_dir
+        self._member_of = member_of
+
+    def __repr__(self):
+        return f'{self.name}, messages: {self.messages}'
+
+    def __add__(self, other):
+        return Individual(
+            name=self.title if self.title else other.title,
+            title=self.title if self.title else other.title,  # TODO depracate one of (name, title)
+            friend=self.friend if self.friend else other.friend,
+            compact=self.compact_name if self.compact_name else other.compact_name,
+            messages=self.messages if len(self.messages) else other.messages,
+            messages_dir=self.messages_dir if self.messages_dir else other.messages_dir,
+            media_dir=self.media_dir if self.media_dir else other.media_dir,
+            member_of=self.member_of if self.member_of else other.member_of
+        )
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def title(self):
+        return self._title
+
+    @property
+    def messages(self):
+        return self._messages
+
+    @messages.setter
+    def messages(self, df):
+        self._messages = df
+
+    @property
+    def friend(self):
+        return self._friend
+
+    @property
+    def media_dir(self):
+        return self._media_dir
+
+    @property
+    def messages_dir(self):
+        return self._messages_dir
+
+    @property
+    def compact_name(self):
+        return self._compact_name
+
+    @property
+    def member_of(self):
+        return self._member_of
diff --git a/Me.py b/miner/Me.py
similarity index 78%
rename from Me.py
rename to miner/Me.py
index 3293bf7..377c3a9 100644
--- a/Me.py
+++ b/miner/Me.py
@@ -1,4 +1,4 @@
-from FacebookData import FacebookData
+from miner.FacebookData import FacebookData
 
 
 class Me(FacebookData):
diff --git a/MessagingAnalyzer.py b/miner/MessagingAnalyzer.py
similarity index 93%
rename from MessagingAnalyzer.py
rename to miner/MessagingAnalyzer.py
index 0619505..8e8ddd4 100644
--- a/MessagingAnalyzer.py
+++ b/miner/MessagingAnalyzer.py
@@ -1,19 +1,13 @@
-from utils import year_converter, month_converter, generate_date_series, get_stats_for_intervals
+from miner.utils import year_converter, month_converter, generate_date_series, get_stats_for_intervals
 from datetime import datetime, date, timedelta
 import pandas as pd
-from ConversationAnalyzer import ConversationAnalyzer
+from miner.ConversationAnalyzer import ConversationAnalyzer
 
 
 class MessagingAnalyzer:
-    def __init__(self, names, people):
-        # TODO input people only. class will know what to do
-        self.names = names
-        self.people = people
-
-    def time_series_analysis_for_all(self, subject=None, **kwargs):
-        time_series = generate_date_series(**kwargs)
-        stacked_df = self.stack_dfs(self.people)
-        interval_stats = get_stats_for_intervals(self.get_stats, stacked_df, time_series, subject=subject)
+    def __init__(self, people):
+        self.names = people.names
+        self.people = people.data
 
     def get_stats(self, df, subject='all', start=None, end=None, period=None):
         # TODO LATER
@@ -97,14 +91,13 @@ def most_used_words_by_partners(self, **kwargs):
 
     # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
     def days_when_most_messages_sent(self):
-        # TODO LATER hard algorithmic problem
         pass
 
     def days_when_most_messages_received(self):
         pass
 
     def hours_when_most_messages_sent(self):
-        # TODO LATER
+        # TODO LATER hard algorithmic problem
         # is this referring to the absolute hour most messages sent??
         # like: 2014.07.25. 15h-16h
         # OR
@@ -119,7 +112,11 @@ def hours_when_most_messages_received(self):
         pass
 
     # 6. Time series: dict of 'year/month/day/hour : number of messages/words/characters (also sent/got) for user/all convos'
-    # TODO
+    def time_series_analysis_for_all(self, subject=None, **kwargs):
+        time_series = generate_date_series(**kwargs)
+        stacked_df = self.stack_dfs(self.people)
+        interval_stats = get_stats_for_intervals(self.get_stats, stacked_df, time_series, subject=subject)
+        # TODO finsh this for time series for all
 
     @staticmethod
     def stack_dfs(people):
diff --git a/miner/People.py b/miner/People.py
new file mode 100644
index 0000000..707c6f5
--- /dev/null
+++ b/miner/People.py
@@ -0,0 +1,52 @@
+import time
+
+from miner.Conversations import Conversations
+from miner.Friends import Friends
+
+# from Me import Me
+
+DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
+
+
+class People:
+    def __init__(self, path=None, name=None):
+        self.data_path = path if path else DATA_PATH
+        self._groups = []
+        self._data = self.get_people(name=name)
+        self._names = self.data.keys()
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def names(self):
+        return self._names
+
+    @property
+    def groups(self):
+        return self._groups
+
+    def get_people(self, name=None):
+        start = time.time()
+        friend = Friends(self.data_path + '/friends/friends.json')
+        friends = friend.get_people(name=name)
+        print('friends: ', time.time() - start)
+
+        # TODO LATER too slow -> store in file
+        start = time.time()
+        conversations = Conversations(self.data_path)
+        individuals = conversations.get_people_from_private_messages()
+
+        print('convos: ', time.time() - start)
+
+        return self.unify_people(friends, individuals)
+
+    @staticmethod
+    def unify_people(friends, convo_partners):
+        for person, friend in friends.items():
+            if not convo_partners.get(person):
+                convo_partners[person] = friend
+            else:
+                convo_partners[person] = convo_partners.get(person) + friend
+        return convo_partners
diff --git a/Visualizer.py b/miner/Visualizer.py
similarity index 88%
rename from Visualizer.py
rename to miner/Visualizer.py
index 052ecb3..440c3e7 100644
--- a/Visualizer.py
+++ b/miner/Visualizer.py
@@ -1,8 +1,8 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
-from People import People
-from ConversationAnalyzer import ConversationAnalyzer
+from miner.People import People
+from miner.ConversationAnalyzer import ConversationAnalyzer
 
 # plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120})
 
@@ -22,7 +22,7 @@ def plot_convos(self, names):
 
     @staticmethod
     def set_up_data(people, name, period='y'):
-        analyzer = ConversationAnalyzer(name, people.individuals.get(name).messages)
+        analyzer = ConversationAnalyzer(name, people.data.get(name).messages)
         interval_stats = analyzer.get_time_series_data(subject='all', start=None, end=None, period=period)
         return analyzer.get_plottable_time_series_data(interval_stats, statistic='msg_count')
 
diff --git a/tests/TestMessages.py b/miner/__init__.py
similarity index 100%
rename from tests/TestMessages.py
rename to miner/__init__.py
diff --git a/utils.py b/miner/utils.py
similarity index 82%
rename from utils.py
rename to miner/utils.py
index 2a48624..c80001b 100644
--- a/utils.py
+++ b/miner/utils.py
@@ -1,11 +1,31 @@
+import os
 import json
 import pandas as pd
 import dateparser
 from datetime import datetime, timedelta
 from dateutil.relativedelta import relativedelta
 
+MESSAGE_SUBPATH = 'messages/inbox'
+MEDIA_DIRS = ['photos', 'gifs', 'files', 'videos', 'audio']
 MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october',
           'november', 'december']
+DELTA_MAP = {
+    'y': relativedelta(years=+1),
+    'm': relativedelta(months=+1),
+    'd': timedelta(days=1),
+    'h': timedelta(hours=1)
+}
+ACCENTS_MAP = {
+    "á": "a",
+    "é": "e",
+    "í": "i",
+    "ó": "o",
+    "ö": "o",
+    "ő": "o",
+    "ú": "u",
+    "ü": "u",
+    "ű": "u",
+}
 
 
 def read_json(file):
@@ -22,28 +42,6 @@ def order_list_of_dicts(lst, key='timestamp_ms'):
     return sorted(lst, key=lambda k: k[key])
 
 
-accents_map = {
-    "á": "a",
-    "é": "e",
-    "í": "i",
-    "ó": "o",
-    "ö": "o",
-    "ő": "o",
-    "ú": "u",
-    "ü": "u",
-    "ű": "u",
-    # "Á": "A",
-    # "É": "E",
-    # "Í": "I",
-    # "Ó": "O",
-    # "Ö": "O",
-    # "Ő": "O",
-    # "Ú": "U",
-    # "Ü": "U",
-    # "Ű": "U",
-}
-
-
 #
 
 
@@ -110,14 +108,6 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-DELTA_MAP = {
-    'y': relativedelta(years=+1),
-    'm': relativedelta(months=+1),
-    'd': timedelta(days=1),
-    'h': timedelta(hours=1)
-}
-
-
 def period_checker(func):
     def wrapper(*args, **kwargs):
         if kwargs.get('start') is not None and kwargs.get('end') is not None:
@@ -134,7 +124,7 @@ def wrapper(*args, **kwargs):
 def generate_date_series(start=None, end=None, period=None):
     if period is None or DELTA_MAP.get(period) is None:
         raise ValueError('Parameter `period` should be one of {y, m, d, h}')
-    start = start or datetime(year=2009, month=10, day=2, hour=0) # TODO change this to date when user joined FB
+    start = start or datetime(year=2009, month=10, day=2, hour=0)  # TODO LATER change this to date when user joined FB
     end = end or datetime.now()
 
     dates = []
@@ -147,14 +137,12 @@ def generate_date_series(start=None, end=None, period=None):
 
 def get_stats_for_intervals(func, df, time_series, subject='all'):
     data = {}
-    for offset, series in time_series.items():
-        data[offset] = {}
-        for i in range(len(series) - 1):  # only looping len - 1 times
-            start = series[i]
-            # TODO LATER will we miss the last entry? I dont think so (99%), but check and correct hand in hand with the timeseries bug
-            # IT DOES NOT! HOWEVER test it with new data injected/modified at runtime <- this is hard
-            end = series[i + 1]
-            data[offset][start] = func(df, subject=subject, start=start, end=end)
+    for i in range(len(time_series) - 1):  # only looping len - 1 times
+        start = time_series[i]
+        # TODO test it with new data injected/modified at runtime <- this is hard
+        # what is this about actually?
+        end = time_series[i + 1]
+        data[start] = func(df, subject=subject, start=start, end=end)
     return data
 
 
@@ -186,3 +174,27 @@ def decode_text(obj):
         return {key: decode_text(item) for key, item in obj.items()}
 
     return obj
+
+
+def lower_names(col):
+    return col.str.lower()
+
+
+def replace_accents(text):
+    for char in ACCENTS_MAP.keys():
+        if char in text:
+            text = text.replace(char, ACCENTS_MAP[char])
+    return text.replace(' ', '')
+
+
+def without_accent_and_whitespace(col):
+    return col.apply(replace_accents)
+
+
+def walk_directory_and_search(path, extension):
+    paths = []
+    for root, dirs, files in os.walk(path):
+        for file_name in files:
+            if file_name.endswith(extension):
+                paths.append(os.path.join(root, file_name))
+    return paths
diff --git a/tests/TestPeople.py b/tests/TestPeople.py
deleted file mode 100644
index 61295d8..0000000
--- a/tests/TestPeople.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pytest
-
-
-
-@pytest.fixture()
-def people_names():
-    return ['John Doe', 'Donald Duck', 'Szett Droxler', 'Foo Bar', 'Tőke Hal', 'Dér Dénes', 'Teflon Musk', 'Daisy Duck',
-            'Guy Fawkes', 'Benedek Elek']
-
-
-def test_specific_people_has_or_has_not_got_messages(people):
-    # TODO LATER parametrize
-    import pandas as pd
-    assert isinstance(people.data.get('Benedek Elek').get('messages'), pd.DataFrame)
-    assert isinstance(people.data.get('Teflon Musk').get('messages'), pd.DataFrame)
-    assert isinstance(people.data.get('Tőke Hal').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('John Doe').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Szett Droxler').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Daisy Duck').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Guy Fawkes').get('messages'), pd.DataFrame)
-
-
-def test_people_name(people, people_names):
-    people_without_groups = [p for p in people.data.keys() if not p.startswith('group')]
-    assert sorted(people_names) == sorted(people_without_groups)
-
-
-def test_some_convos_are_with_friends(people):
-    assert people.data.get('Teflon Musk').get('friend')
-    assert not people.data.get('Benedek Elek').get('friend')
-
-
-def test_specific_people_has_or_has_not_got_media(people):
-    assert people.data.get('Teflon Musk').get('media_dir')
-
-#TODO LATER test individuals too
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 430e923..08e382b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,12 @@
 import pytest
-from People import People
+from miner.People import People
 
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
 
 @pytest.fixture(scope='session')
-def people():
-    p = People(path=TEST_DATA_PATH)
-    return p
+def get_people():
+    def _get_people(name=None):
+        return People(path=TEST_DATA_PATH, name=name)
+    return _get_people
+
diff --git a/tests/TestConversationAnalyzer.py b/tests/test_ConversationAnalyzer.py
similarity index 94%
rename from tests/TestConversationAnalyzer.py
rename to tests/test_ConversationAnalyzer.py
index 015ac19..f7d679f 100644
--- a/tests/TestConversationAnalyzer.py
+++ b/tests/test_ConversationAnalyzer.py
@@ -1,22 +1,16 @@
 import pytest
-from ConversationAnalyzer import ConversationAnalyzer
-from People import People
-from utils import dt
+from miner.ConversationAnalyzer import ConversationAnalyzer
+from miner.People import People
+from miner.utils import dt
 
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
 
-# @pytest.mark.parametrize("test_input,expected", [("3+5", 8), ("2+4", 6), ("6*9", 42)])
-# def test_eval(test_input, expected):
-#     assert eval(test_input) == expected
-
-# get\(\'.*\'\)\.
-
-
 @pytest.fixture(scope='session')
-def person(people):
+def person(get_people):
     def _person(name):
-        return people.individuals[name]
+        people = get_people(name)
+        return people.data[name]
 
     return _person
 
@@ -224,11 +218,12 @@ def test_stats_teflon_musk_all_2014_12(statistics):
     assert stats.char_count == 0
     # assert stats.most_used_chars == 0
 
-class TestConversationAnalyzer: # Foo Bar
+
+class TestConversationAnalyzer:  # Foo Bar
     pass
 
 
 def test_time_series_analysis_for_user(analyze):
     analyzer = analyze('Teflon Musk')
-    analyzer.get_time_series_data(subject='all')
+    analyzer.get_time_series_data(subject='all', period='y')
     assert 1
diff --git a/tests/TestConversations.py b/tests/test_Conversations.py
similarity index 79%
rename from tests/TestConversations.py
rename to tests/test_Conversations.py
index e198dc9..d066361 100644
--- a/tests/TestConversations.py
+++ b/tests/test_Conversations.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import pytest
-from Conversations import Conversations
+from miner.Conversations import Conversations
+from miner import utils
 import os
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
@@ -8,12 +9,12 @@
 @pytest.fixture()
 def convos():
     convo = Conversations(f'{TEST_DATA_PATH}')
-    return convo.get_people()
+    return convo.get_people_from_private_messages()
 
 
 def test_get_all_people_from_convo(convos):
     people = []
-
+    # TODO make this work
     for convo in convos.keys():
         if convo.startswith('group'):
             people += [p for p in convos[convo].get('participants')]
@@ -28,19 +29,19 @@ def test_get_all_people_from_convo(convos):
 
 
 def test_all_convos_have_dir(convos):
-    assert all([data.get('messages_dir') for data in convos.values()])
+    assert all([data.messages_dir for data in convos.values()])
 
 
 def test_all_convos_have_messages_df(convos):
-    assert all([isinstance(data.get('messages'), pd.DataFrame) for data in convos.values()])
+    assert all([isinstance(data.messages, pd.DataFrame) for data in convos.values()])
 
 
 def test_some_convos_as_media_dir(convos):
-    assert convos.get('Teflon Musk').get('media_dir')
-    assert not convos.get('Benedek Elek').get('media_dir')
+    assert convos.get('Teflon Musk').media_dir
+    assert not convos.get('Benedek Elek').media_dir
 
 def test_convo_media_has_one_folder_of_possibles(convos):
-    listed_dir = os.listdir(f"{TEST_DATA_PATH}/{convos.get('Teflon Musk').get('media_dir')}")
+    listed_dir = os.listdir(f"{TEST_DATA_PATH}/{utils.MESSAGE_SUBPATH}/{convos.get('Teflon Musk').media_dir}")
     assert 'files' in listed_dir
     assert 'photos' in listed_dir
     assert 'audio' not in listed_dir
diff --git a/tests/TestFriends.py b/tests/test_Friends.py
similarity index 91%
rename from tests/TestFriends.py
rename to tests/test_Friends.py
index f336609..652b671 100644
--- a/tests/TestFriends.py
+++ b/tests/test_Friends.py
@@ -1,6 +1,6 @@
 import pytest
 
-from Friends import Friends
+from miner.Friends import Friends
 
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
@@ -31,7 +31,7 @@ def test_get_peoples_names_from_friends(friends, expected_friends):
 def test_get_peoples_compact_name_from_friends(friends, expected_friends):
     expected_compact_names = [value.get('compact_name') for value in expected_friends.values()]
 
-    assert all([p.get('compact_name') in expected_compact_names for p in friends.values()])
+    assert all([p.compact_name in expected_compact_names for p in friends.values()])
 
 
 
diff --git a/tests/test_Messages.py b/tests/test_Messages.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/TestMessagingAnalyzer.py b/tests/test_MessagingAnalyzer.py
similarity index 98%
rename from tests/TestMessagingAnalyzer.py
rename to tests/test_MessagingAnalyzer.py
index b803693..ec03497 100644
--- a/tests/TestMessagingAnalyzer.py
+++ b/tests/test_MessagingAnalyzer.py
@@ -1,10 +1,11 @@
 import pytest
-from MessagingAnalyzer import MessagingAnalyzer
-from utils import dt
+from miner.MessagingAnalyzer import MessagingAnalyzer
+from miner.utils import dt
 
 @pytest.fixture(scope='session')
-def analyzer(people):
-    return MessagingAnalyzer(people.names, people.individuals)
+def analyzer(get_people):
+    people = get_people()
+    return MessagingAnalyzer(people)
 
 
 def test_total_number_of_messages(analyzer):
diff --git a/tests/test_People.py b/tests/test_People.py
new file mode 100644
index 0000000..e9f270e
--- /dev/null
+++ b/tests/test_People.py
@@ -0,0 +1,39 @@
+import pytest
+
+
+
+@pytest.fixture()
+def people_names():
+    return ['John Doe', 'Donald Duck', 'Szett Droxler', 'Foo Bar', 'Tőke Hal', 'Dér Dénes', 'Teflon Musk', 'Daisy Duck',
+            'Guy Fawkes', 'Benedek Elek']
+
+@pytest.fixture
+def people(get_people):
+    return get_people()
+
+def test_specific_people_has_or_has_not_got_messages(people):
+    # TODO LATER parametrize
+    import pandas as pd
+    assert isinstance(people.data.get('Benedek Elek').messages, pd.DataFrame)
+    assert isinstance(people.data.get('Teflon Musk').messages, pd.DataFrame)
+    assert isinstance(people.data.get('Tőke Hal').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('John Doe').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Szett Droxler').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Daisy Duck').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Guy Fawkes').messages, pd.DataFrame)
+
+
+def test_people_name(people, people_names):
+    people_without_groups = [p for p in people.data.keys() if not p.startswith('group')]
+    assert sorted(people_names) == sorted(people_without_groups)
+
+
+def test_some_convos_are_with_friends(people):
+    assert people.data.get('Teflon Musk').friend
+    assert not people.data.get('Benedek Elek').friend
+
+
+def test_specific_people_has_or_has_not_got_media(people):
+    assert people.data.get('Teflon Musk').media_dir
+
+#TODO LATER test individuals too
\ No newline at end of file
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 579569c..4d11263 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,5 @@
 import unittest
-from utils import *
+from miner.utils import *
 from pathlib import Path
 import reusables
 from reusables.cli import *

From b2f725eda6da5500aa2effcd351494a401e79347 Mon Sep 17 00:00:00 2001
From: Levente Csoke <leventec3@gmail.com>
Date: Sat, 15 Aug 2020 01:01:19 +0200
Subject: [PATCH 2/3] refactored MsgA and ConvoA into Analyzer; added lot of
 functionalities for gathering data for plotting

---
 .gitignore                         |  21 +-
 README.md                          |   8 +-
 miner/Analyzer.py                  | 133 ++++++++++++
 miner/App.py                       |  26 +--
 miner/ConversationAnalyzer.py      | 146 -------------
 miner/ConversationStats.py         |  89 ++++++++
 miner/Conversations.py             | 155 ++++++--------
 miner/FacebookData.py              |   2 -
 miner/Friends.py                   |   4 +-
 miner/Individual.py                |  10 +-
 miner/Me.py                        |   2 +-
 miner/Messages.py                  |  64 ++++++
 miner/MessagingAnalyzer.py         | 127 -----------
 miner/People.py                    |  11 +-
 miner/requirements.txt             |   9 +
 miner/utils.py                     |  68 ++++--
 requirements.txt                   |   5 +
 tests/test_ConversationAnalyzer.py |  33 ++-
 tests/test_Conversations.py        |  92 ++++----
 tests/test_MessagingAnalyzer.py    | 333 +++++++++++++++--------------
 tests/test_utils.py                |   9 +-
 21 files changed, 670 insertions(+), 677 deletions(-)
 create mode 100644 miner/Analyzer.py
 delete mode 100644 miner/ConversationAnalyzer.py
 create mode 100644 miner/ConversationStats.py
 create mode 100644 miner/Messages.py
 delete mode 100644 miner/MessagingAnalyzer.py
 create mode 100644 miner/requirements.txt
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
index 5b8858a..4755795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -228,24 +228,19 @@ dmypy.json
 
 # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebook,pycharm,visualstudiocode
 
+# ignoring data folder
+data
 
-# ignoring data
-data 
+# ignoring jupyter notebook
+tests/playground.py
 
-# ignoring todo 
+# ignoring various files created during development
+plots
+*.png
 todo.md
-
-
-# ignoring trash file
 trash.py
-
-
-# ignoring jupyter notebook
 explore.ipynb
-
-
-# ignoring jupyter notebook
-tests/playground.py
+tests/test_data/messages/inbox/private_messages.json
 
 tests/.pytest_cache
 .pytest_cache
\ No newline at end of file
diff --git a/README.md b/README.md
index 943a123..c4b3b6a 100644
--- a/README.md
+++ b/README.md
@@ -11,4 +11,10 @@ More info soon...
 ## Contribution
 Help is more than welcome. If somebody feel the urge to contribute, I would share my plans with them.
 
-Ideas are welcome too. Feel free to open a new issue.
\ No newline at end of file
+Ideas are welcome too. Feel free to open a new issue.
+
+
+For running VIsualizer CLI:
+```shell script
+export PYTHONPATH="$PWD"
+```
diff --git a/miner/Analyzer.py b/miner/Analyzer.py
new file mode 100644
index 0000000..b0e61bc
--- /dev/null
+++ b/miner/Analyzer.py
@@ -0,0 +1,133 @@
+from miner.ConversationStats import ConversationStats
+from miner import utils
+import pandas as pd
+
+
+class Analyzer:
+    # TODO do we need to override __subclasscheck__ ?
+
+    # def __new__(cls, name, messages, *args, **kwargs):
+    #     if messages is None:  # This deals with the case if no messages
+    #         return None
+    #     return super(Analyzer, cls).__new__(cls, *args, **kwargs)
+
+    def __init__(self, people):
+        self.people = people
+        self.people_data = people.data
+        self.names = people.names
+        self.multi = len(self.people_data) > 1
+
+        if self.multi:
+            self.df = self.stack_dfs()
+        else:
+            # TODO solve this hand in hand with the __new__ method. too ugly
+            self.df = self.people_data.get(list(self.names)[0]).messages
+
+    def get_stats_for_intervals(self, time_series, subject='all'):
+        data = {}
+        for i in range(len(time_series) - 1):  # only looping len - 1 times
+            start = time_series[i]
+            end = time_series[i + 1]
+            data[start] = self.get_stats(self.df, subject=subject, start=start, end=end)
+        return data
+
+    def get_stats(self, df=None, subject='all', start=None, end=None, period=None):
+        df = self.df if df is None else df
+        df = self.filter_by_input(df, subject=subject, start=start, end=end, period=period)
+        stats = ConversationStats(df)
+        return stats
+
+    @staticmethod
+    def get_plottable_time_series_data(interval_stats, statistic):
+        for k, v in interval_stats.items():
+            if isinstance(v, ConversationStats):
+                interval_stats[k] = getattr(v, statistic)
+        return interval_stats
+
+    @property
+    def stats(self):
+        return self.get_stats()
+
+    def __str__(self):
+        if self.multi:
+            return self.names
+        else:
+            return f'{self.names[0]}: {list(self.df.index)}'
+
+    def stack_dfs(self):
+        dfs = []
+        for data in self.people_data.values():
+            if data.messages is not None:
+                dfs.append(data.messages)
+        return pd.concat(dfs).sort_index()
+
+    # 1. Total count of messages/words/characters (also by year/month/day/hour)
+    # 2. Total count of messages/words/characters sent (also by year/month/day/hour)
+    # 3. Total count of messages/words/characters received (also by year/month)
+    def get_count(self, attribute, subject='all', start=None, end=None, period=None):
+        stats = self.get_stats(subject=subject, start=start, end=end, period=period)
+        return getattr(stats, attribute)
+
+    #################
+
+    # 4. Most used messages/words in convos by me/partner (also by year/month/day/hour)
+    def most_used_messages_(self, **kwargs):
+        """
+        >>> s1 = pd.Series([3, 1, 2, 3, 4, 1, 1])
+        >>> s2 = pd.Series([3, 2, 1, 1])
+        >>> s1_vc = s1.value_counts()
+        >>> s2_vc = s2.value_counts()
+        TODO LATER most used is already a problem:
+          - because its a series of all the unique messages/words ever used in a convo
+          - it contains strings like ':d', ':p' and 'xd'
+          - from all the convos the result of value_counts has to be cleared
+          and has to be truncated (that is not use the 200th most used word, only top10 let's say)
+          - then these series has to be merged in a way that the same string's counts are added up
+          - what about typos????!
+        """
+        pass
+
+    # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
+    def stat_per_period(self, period, attribute, **kwargs):
+        interval_stats = self.get_time_series_data(period, **kwargs)
+        # TODO attribute is one of (msg, word, char)
+        time_series_data = self.get_plottable_time_series_data(interval_stats, statistic=attribute)
+        return utils.count_stat_for_period(time_series_data, period)
+
+    # 6. Time series: dict of 'year/month/day/hour : number of messages/words/characters (also sent/got) for user/all convos'
+    def get_time_series_data(self, period, subject='all', **kwargs):
+        time_series = utils.generate_date_series(period, **kwargs)
+        return self.get_stats_for_intervals(self.df, time_series, subject=subject)
+
+    # # 7. Ranking of friends by messages by y/m/d/h, by different stats, by sent/got
+    def get_ranking_of_friends_by_messages(self, attribute='msg_count', subject='all', start=None, end=None,
+                                           period=None):
+        # TODO almost the same function as get_count
+        count_dict = {}
+        for name in self.names:
+            # analyzer = Analyzer({name: self.people.get(name)}) # this has to be a people instance?! OR?
+            # analyzer = Analyzer(People(self.people.data_path, name=name))  # this has to be a people instance?! OR?
+            df = self.df[self.df.partner == name]
+            stats = self.get_stats(df=df, subject=subject, start=start, end=end, period=period)
+            if stats is not None:
+                count_dict = utils.fill_dict(count_dict, name, getattr(stats, attribute))
+
+        count_dict = {key: value for key, value in sorted(count_dict.items(), key=lambda item: item[1], reverse=True)}
+        return count_dict
+
+    @staticmethod
+    @utils.subject_checker
+    @utils.date_checker
+    @utils.period_checker
+    def filter_by_input(df, subject='all', start=None, end=None, period=None):
+        if subject == 'me':
+            df = df[df.sender_name == 'Levente Csőke']
+        elif subject == 'partner':
+            df = df[df.sender_name != 'Levente Csőke']
+        if start and end:
+            df = df.loc[start:end]
+        elif start and not end:
+            df = df.loc[start:start + period]
+        elif not start and end:
+            df = df.loc[end - period:end]
+        return df
diff --git a/miner/App.py b/miner/App.py
index c2560bd..4989e8b 100644
--- a/miner/App.py
+++ b/miner/App.py
@@ -1,5 +1,5 @@
-from miner.ConversationAnalyzer import ConversationAnalyzer
-from miner.MessagingAnalyzer import MessagingAnalyzer
+from miner.Analyzer import Analyzer
+
 from miner.People import People
 
 DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
@@ -13,26 +13,8 @@ def __init__(self):
     def analyze_messages():
         p = People(path=DATA_PATH)
 
-        stats = {}
-
-        for name, person in p.data.items():
-            if person.messages is None:
-                stats[person.name] = None
-                continue
-            analyzer = ConversationAnalyzer(person.name, person.messages)
-            stats[person.name] = analyzer.stats
-            # if stats[person.name].get('message_count').get('me') > 5000:
-            #    top[person.name] = stats[person.name]
-        print()
-
-        # print('LEN: ', len(top.keys()))
-        # top_all = {name: data.get('message_count').get('all') for name, data in top.items()}
-        # analyzer.visualize_stats(top)
-
-    @staticmethod
-    def analyze_messaging():
-        people = People(path=DATA_PATH)
-        msg_analyzer = MessagingAnalyzer(people)
+        analyzer = Analyzer(p)
+        rank = analyzer.get_ranking_of_friends_by_messages(attribute='char_count')
 
 
 if __name__ == '__main__':
diff --git a/miner/ConversationAnalyzer.py b/miner/ConversationAnalyzer.py
deleted file mode 100644
index a5928bf..0000000
--- a/miner/ConversationAnalyzer.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import pandas as pd
-from miner.utils import date_checker, period_checker, subject_checker, generate_date_series, get_stats_for_intervals
-
-
-class ConversationAnalyzer:
-    def __new__(cls, name, messages, *args, **kwargs):
-        if messages is None:  # This deals with the case if no messages
-            return None
-        return super(ConversationAnalyzer, cls).__new__(cls, *args, **kwargs)
-
-    def __init__(self, name, messages):
-        self.name = name
-        self.df = messages
-
-    def __str__(self):
-        return f'{self.name}: {list(self.df.index)}'
-
-    @property
-    def stats(self):
-        return self.get_stats(self.df)
-
-    def get_stats(self, df, subject='all', start=None, end=None, period=None):
-        df = self.filter_by_input(df, subject=subject, start=start, end=end, period=period)
-        stats = ConversationStats(df)
-        return stats
-
-    def get_time_series_data(self, subject='all', **kwargs):
-        time_series = generate_date_series(**kwargs)
-        return get_stats_for_intervals(self.get_stats, self.df, time_series, subject=subject)
-
-    @staticmethod
-    def get_plottable_time_series_data(interval_stats, statistic):
-        for k, v in interval_stats.items():
-            if isinstance(v, ConversationStats):
-                interval_stats[k] = getattr(v, statistic)
-        return interval_stats
-
-    @staticmethod
-    @subject_checker
-    @date_checker
-    @period_checker
-    def filter_by_input(df, subject='all', start=None, end=None, period=None):
-        if subject == 'me':
-            df = df[df.sender_name == 'Levente Csőke']
-        elif subject == 'partner':
-            df = df[df.sender_name != 'Levente Csőke']
-        if start and end:
-            df = df.loc[start:end]
-        elif start and not end:
-            df = df.loc[start:start + period]
-        elif not start and end:
-            df = df.loc[end - period:end]
-        return df
-
-
-class ConversationStats:
-    """
-    Statistics of conversation with one person.
-    """
-
-    def __init__(self, df):
-        self.df = df
-
-    def __repr__(self):
-        return f'{self.msg_count}'
-
-    @property
-    def messages(self):
-        return self.df.content.dropna()
-
-    @property
-    def words(self):
-        return self.get_words()
-
-    # 1.
-    @property
-    def msg_count(self):
-        return len(self.df)
-
-    # 2.
-    @property
-    def unique_msg_count(self):
-        return len(self.messages.unique())
-
-    # 3.
-    @property
-    def most_used_msgs(self):
-        # TODO LATER first few (1-10) messages
-        return self.messages.value_counts()
-
-    # 4.
-    @property
-    def msg_frequency(self):
-        # NOTE this has been most likely depracated OR?
-        pass
-
-    # 5.
-    @property
-    def word_count(self):
-        return len(self.words)
-
-    # 6.
-    @property
-    def unique_word_count(self):
-        return len(set(self.words))
-
-    # 7.
-    @property
-    def most_used_words(self):
-        s = pd.Series(self.words)
-        return s.value_counts()
-
-    # 8.
-    @property
-    def word_frequency(self):
-        pass
-
-    # 9.
-    @property
-    def char_count(self):
-        char_count = 0
-        for word in self.words:
-            char_count += len(word)
-        return char_count
-
-    # 10.
-    @property
-    def rate_of_media_messages(self):
-        """
-        TODO LATER
-        search for media messages all 5 of them
-        rate is only the second or third abstraction
-        """
-        pass
-
-    def get_words(self):
-        token_list = self.messages.str.lower().str.split()
-        words = []
-        for tokens in token_list:
-            # print(tokens)
-            if not isinstance(tokens, list):
-                print('WARNING! Not a list!')
-                continue  # TODO ??? check this
-            for token in tokens:
-                words.append(token)
-        return words
diff --git a/miner/ConversationStats.py b/miner/ConversationStats.py
new file mode 100644
index 0000000..040bd17
--- /dev/null
+++ b/miner/ConversationStats.py
@@ -0,0 +1,89 @@
+
+class ConversationStats:
+    """
+    Statistics of conversation with one person.
+    """
+
+    def __init__(self, df):
+        self.df = df
+
+    def __repr__(self):
+        return f'{self.msg_count}'
+
+    @property
+    def messages(self):
+        return self.df.content.dropna()
+
+    @property
+    def words(self):
+        return self.get_words()
+
+    # 1.
+    @property
+    def msg_count(self):
+        return len(self.df)
+
+    # 2.
+    @property
+    def unique_msg_count(self):
+        return len(self.messages.unique())
+
+    # 3.
+    @property
+    def most_used_msgs(self):
+        return self.messages.value_counts()
+
+    # 4.
+    @property
+    def msg_frequency(self):
+        # NOTE this has been most likely depracated OR?
+        pass
+
+    # 5.
+    @property
+    def word_count(self):
+        return len(self.words)
+
+    # 6.
+    @property
+    def unique_word_count(self):
+        return len(set(self.words))
+
+    # 7.
+    @property
+    def most_used_words(self):
+        return pd.Series(self.words).value_counts()
+
+    # 8.
+    @property
+    def word_frequency(self):
+        pass
+
+    # 9.
+    @property
+    def char_count(self):
+        char_count = 0
+        for word in self.words:
+            char_count += len(word)
+        return char_count
+
+    # 10.
+    @property
+    def rate_of_media_messages(self):
+        """
+        TODO LATER
+        search for media messages all 5 of them
+        rate is only the second or third abstraction
+        """
+        pass
+
+    def get_words(self):
+        token_list = self.messages.str.lower().str.split()
+        words = []
+        for tokens in token_list:
+            if not isinstance(tokens, list):
+                print('WARNING! Not a list!')
+                continue
+            for token in tokens:
+                words.append(token)
+        return words
diff --git a/miner/Conversations.py b/miner/Conversations.py
index 4f61ad9..7a373d4 100644
--- a/miner/Conversations.py
+++ b/miner/Conversations.py
@@ -1,64 +1,93 @@
+import pandas as pd
 import os
-from miner.Group import Group
+
+
+from miner.Messages import Messages
 from miner.Individual import Individual
-from miner.FacebookData import FacebookData
-import pandas as pd
+
 from miner import utils
-from datetime import datetime
 
 
 class Conversations:
     def __init__(self, data_path):
-        self.indie_convo_paths = []
-        self.group_convo_paths = []
-        self.deleted_user_convo_paths = []
+        self.private_convo_paths = {}
+        self.group_convo_paths = {} # TODO fill this as well
+        self.deleted_user_convo_paths = []  # NOTE these are collected but not yet used
 
         self.data_path = f'{data_path}/{utils.MESSAGE_SUBPATH}'
         self.order_paths()
 
     def order_paths(self):
-        json_paths = utils.walk_directory_and_search(self.data_path, '.json')
+        paths_map = f'{self.data_path}/private_messages.json'
+        if os.path.isfile(paths_map):
+            self.read_paths(paths_map)
+            return
+        json_paths = utils.walk_directory_and_search(self.data_path, '.json', contains_string='message_')
         self.differentiate_paths(json_paths)
+        self.register_paths()
 
     def differentiate_paths(self, jsons):
         for file in jsons:
             msg = Messages(file)
             if msg.title == 'Facebook User':
-                self.deleted_user_convo_paths.append(file)
+                self.deleted_user_convo_paths.append(msg.messages_dir)
             elif msg.ttype == 'RegularGroup':
-                self.group_convo_paths.append(file)
+                self.map_group_convo_files(msg, file)
             elif msg.ttype == 'Regular':
-                self.indie_convo_paths.append(file)
+                # self.private_convo_paths[msg.title] = msg.messages_dir
+                self.map_private_convo_files(msg, file)
             else:
                 raise ValueError('Should not happen!')
 
+    def register_paths(self):
+        utils.dump_to_json(self.private_convo_paths, f'{self.data_path}/private_messages.json')
+        
+    def read_paths(self, file):
+        self.private_convo_paths = utils.read_json(file)
+        print()
+
+    def map_private_convo_files(self, msg, file):
+        name = msg.title
+        if self.private_convo_paths.get(name):
+            self.private_convo_paths[name].append(file)
+        else:
+            self.private_convo_paths[name] = [file]
+
+    def map_group_convo_files(self, msg, file):
+        for participant in msg.participants:
+            if self.group_convo_paths.get(file):
+                self.group_convo_paths[file].append(participant)
+            else:
+                self.group_convo_paths[file] = [participant]
+
     def get_people_from_private_messages(self, name=None, membership=True):
         name_data_map = {}
-        paths = self.indie_convo_paths
-        if name is not None:
-            paths = self.filter_by_name(name)
-        for file in paths:
-            messages = Messages(file)
-            name = messages.title
-            if name_data_map.get(name) is not None:
-                dfs = [name_data_map[name].messages, messages.df]
-                name_data_map[name].messages = pd.concat(dfs).sort_index()
-            else:
-                # TODO we may also want to get group messages where name is present
-                name_data_map[name] = self.create_individual(messages, membership=membership)
+        convo_path_map = self.filter_by_name(name) if name is not None else self.private_convo_paths.values()
+        for paths in convo_path_map:
+            for file in paths:
+                messages = Messages(file)
+                name = messages.title
+                if name_data_map.get(name) is not None:
+                    dfs = [name_data_map[name].messages, messages.df]
+                    name_data_map[name].messages = pd.concat(dfs).sort_index()
+                else:
+                    name_data_map[name] = self.create_individual(messages, membership=membership)
         return name_data_map
 
     def filter_by_name(self, name):
         filtered_paths = []
-        compact_name = None if name is None else utils.replace_accents(name.lower())
-        for path in self.indie_convo_paths:
-            if compact_name in os.path.basename(os.path.dirname(os.path.normpath(path))):
-                filtered_paths.append(path)
+        names = []
+        if isinstance(name, str):
+            names = [name]
+        elif isinstance(name, list):
+            names = name
+        for name in names:
+            filtered_paths.append(self.private_convo_paths.get(name))
         return filtered_paths
 
     def create_individual(self, messages, membership=None):
         return Individual(
-            name=messages.title, title=messages.title,  # TODO depracate one of (name, title)
+            name=messages.title,
             compact=messages.compact_names,
             messages=messages.df,
             messages_dir=messages.messages_dir,
@@ -66,74 +95,12 @@ def create_individual(self, messages, membership=None):
             member_of=self.group_membership(messages.title) if membership else None,
         )
 
-    @staticmethod
-    def fill_data_map(message):
-        return {
-            'title': message.title,
-            'compact_name': message.compact_names,
-            # 'participants': msg.participants + ['Levente Csőke'],
-            'participants': message.participants,
-            'messages': message.df,
-            'friend': None,
-            'messages_dir': message.messages_dir,
-            'media_dir': message.media_dir
-        }
-
     @staticmethod
     def group_membership(name):
         return None
 
-    @staticmethod
-    def json_is_a_group_msg(file):
-        msg = Messages(file)
-        return msg.ttype == 'RegularGroup'
-
-
-class Messages(FacebookData):
-    def __init__(self, json_path):
-        super().__init__(json_path)
-        self.to_df('messages')
-        self.set_date_as_index()
-
-    @property
-    def names(self):
-        return pd.DataFrame(self.participants)[0]
-
-    @property
-    def participants(self):
-        participants = self.decoded.get('participants')
-        # TODO I should be IN
-        # but this breaks stuff at TestMessagingAnalyzer
-        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
-        # return [p.get('name') for p in participants if p.get('name')]
-
-    @property
-    def title(self):
-        return self.decoded.get('title')
-
-    @property
-    def ttype(self):
-        return self.decoded.get('thread_type')
-
-    @property
-    def messages_dir(self):
-        thread_path = self.decoded.get('thread_path')
-        if not thread_path.startswith('inbox/'):
-            raise ValueError('Field `thread_path` should start with `inbox/`.')
-        return thread_path.split('inbox/')[1]
-
-    @property
-    def media_dir(self):
-        for media in utils.MEDIA_DIRS:
-            if media in self._df.columns:
-                media_in_msg = list(self._df[media][self._df[media].notnull()])
-                uri = media_in_msg[0][0].get('uri')
-                return os.path.dirname(os.path.dirname(uri)).split('inbox/')[1]
-
-    def set_date_as_index(self):
-        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
-        self._df = self._df.set_index(date_series).iloc[::-1]
+    def get_people_from_group_messages(self):
+        pass  # TODO for v0.0.4
+
+
 
-    @staticmethod
-    def ts_to_date(date):
-        return datetime.fromtimestamp(date / 1000)  # .strftime('%Y-%m-%d')
diff --git a/miner/FacebookData.py b/miner/FacebookData.py
index babe74d..81b946f 100644
--- a/miner/FacebookData.py
+++ b/miner/FacebookData.py
@@ -21,8 +21,6 @@ def json(self):
 
     @property
     def compact_names(self):
-        # NOTE this is the place where we change pd/np to builtin
-        # do we have to do this?
         name_list = list(utils.without_accent_and_whitespace(utils.lower_names(self.names)))  # should be just fine
         return name_list[0] if len(name_list) == 1 else name_list
 
diff --git a/miner/Friends.py b/miner/Friends.py
index 7950155..98d995f 100644
--- a/miner/Friends.py
+++ b/miner/Friends.py
@@ -1,5 +1,3 @@
-import pandas as pd
-import os
 from miner.FacebookData import FacebookData
 from miner.Individual import Individual
 
@@ -16,7 +14,7 @@ def get_people(self, name=None):
             if name is not None and name != full_name:  # filtering for name
                 continue
             names[full_name] = Individual(
-                name=full_name, title=full_name,  # TODO depracate one of (name, title)
+                name=full_name,
                 compact=compact,
                 friend=True,
             )
diff --git a/miner/Individual.py b/miner/Individual.py
index 84d63b5..4518a5f 100644
--- a/miner/Individual.py
+++ b/miner/Individual.py
@@ -1,9 +1,8 @@
 class Individual:
-    def __init__(self, name=None, title=None, compact=None, messages=None, friend=None, messages_dir=None,
+    def __init__(self, name=None, compact=None, messages=None, friend=None, messages_dir=None,
                  media_dir=None,
                  member_of=None):
         self._name = name
-        self._title = title
         self._compact_name = compact
         self._messages = messages
         self._friend = friend
@@ -16,8 +15,7 @@ def __repr__(self):
 
     def __add__(self, other):
         return Individual(
-            name=self.title if self.title else other.title,
-            title=self.title if self.title else other.title,  # TODO depracate one of (name, title)
+            name=self.name if self.name else other.name,
             friend=self.friend if self.friend else other.friend,
             compact=self.compact_name if self.compact_name else other.compact_name,
             messages=self.messages if len(self.messages) else other.messages,
@@ -30,10 +28,6 @@ def __add__(self, other):
     def name(self):
         return self._name
 
-    @property
-    def title(self):
-        return self._title
-
     @property
     def messages(self):
         return self._messages
diff --git a/miner/Me.py b/miner/Me.py
index 377c3a9..b10356d 100644
--- a/miner/Me.py
+++ b/miner/Me.py
@@ -7,4 +7,4 @@ def __init__(self, *args):
 
     @property
     def name(self):
-        return 'Levente Csőke'
+        return ''
diff --git a/miner/Messages.py b/miner/Messages.py
new file mode 100644
index 0000000..6fbc9d3
--- /dev/null
+++ b/miner/Messages.py
@@ -0,0 +1,64 @@
+from datetime import datetime
+import pandas as pd
+import os
+
+from miner.FacebookData import FacebookData
+from miner import utils
+
+
+class Messages(FacebookData):
+    def __init__(self, json_path):
+        super().__init__(json_path)
+        self.to_df('messages')
+        self.set_date_as_index()
+        self.add_partner_column()
+
+    @property
+    def names(self):
+        # TODO ugly
+        try:
+            return pd.DataFrame(self.participants)[0]
+        except KeyError:
+            return pd.Series({0: 'Facebook User'})
+
+    @property
+    def participants(self):
+        participants = self.decoded.get('participants')
+        # TODO I should be IN
+        # but this breaks stuff at TestMessagingAnalyzer
+        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
+        # return [p.get('name') for p in participants if p.get('name')]
+
+    @property
+    def title(self):
+        return self.decoded.get('title')
+
+    @property
+    def ttype(self):
+        return self.decoded.get('thread_type')
+
+    @property
+    def messages_dir(self):
+        thread_path = self.decoded.get('thread_path')
+        if not thread_path.startswith('inbox/'):
+            raise ValueError('Field `thread_path` should start with `inbox/`.')
+        return thread_path.split('inbox/')[1]
+
+    @property
+    def media_dir(self):
+        for media in utils.MEDIA_DIRS:
+            if media in self._df.columns:
+                media_in_msg = list(self._df[media][self._df[media].notnull()])
+                uri = media_in_msg[0][0].get('uri')
+                return os.path.dirname(os.path.dirname(uri)).split('inbox/')[1]
+
+    def set_date_as_index(self):
+        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
+        self._df = self._df.set_index(date_series).iloc[::-1]
+
+    def add_partner_column(self):
+        self._df['partner'] = self.title
+
+    @staticmethod
+    def ts_to_date(date):
+        return datetime.fromtimestamp(date / 1000)
diff --git a/miner/MessagingAnalyzer.py b/miner/MessagingAnalyzer.py
deleted file mode 100644
index 8e8ddd4..0000000
--- a/miner/MessagingAnalyzer.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from miner.utils import year_converter, month_converter, generate_date_series, get_stats_for_intervals
-from datetime import datetime, date, timedelta
-import pandas as pd
-from miner.ConversationAnalyzer import ConversationAnalyzer
-
-
-class MessagingAnalyzer:
-    def __init__(self, people):
-        self.names = people.names
-        self.people = people.data
-
-    def get_stats(self, df, subject='all', start=None, end=None, period=None):
-        # TODO LATER
-        # here you have to do something with it
-        pass
-
-    def get_count(self, attribute, subject='all', start=None, end=None, period=None):
-        count = 0
-        # we have a list of names we want to iterate over
-        for name in self.names:
-            stats = self.get_conversation_stats(name=name, subject=subject, start=start, end=end, period=period)
-            if stats is not None:
-                count += getattr(stats, attribute)
-        return count
-
-    def get_conversation_stats(self, name, subject='all', start=None, end=None, period=None):
-        messages = self.people.get(name).messages
-        analyzer = ConversationAnalyzer(name, messages)
-        if analyzer is None:
-            return None
-        return analyzer.get_stats(messages, subject=subject, start=start, end=end, period=period)
-
-    def total_number_of_(self, attribute, subject='all', **kwargs):
-        return self.get_count(attribute=attribute, subject=subject, **kwargs)
-
-    # 1. Ranking of friends by total count of messages/words/characters (also by year/month/day/hour)
-    def total_number_of_messages(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', **kwargs)
-
-    def total_number_of_words(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', **kwargs)
-
-    def total_number_of_characters(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', **kwargs)
-
-    # 2. Ranking of friends who I sent the most messages/words/characters (also by year/month/day/hour)
-    def total_number_of_messages_sent(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', subject='me', **kwargs)
-
-    def total_number_of_words_sent(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', subject='me', **kwargs)
-
-    def total_number_of_characters_sent(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', subject='me', **kwargs)
-
-    # 3. Ranking of friends who sent the most messages/words/characters (also by year/month)
-    def total_number_of_messages_received(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', subject='partner', **kwargs)
-
-    def total_number_of_words_received(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', subject='partner', **kwargs)
-
-    def total_number_of_characters_received(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', subject='partner', **kwargs)
-
-    # 4. Most used messages/words in convos by me/partner (also by year/month/day/hour)
-    def most_used_messages_by_me(self, **kwargs):
-        """
-        >>> s1 = pd.Series([3, 1, 2, 3, 4, 1, 1])
-        >>> s2 = pd.Series([3, 2, 1, 1])
-        >>> s1_vc = s1.value_counts()
-        >>> s2_vc = s2.value_counts()
-        TODO LATER most used is already a problem:
-          - because its a series of all the unique messages/words ever used in a convo
-          - it contains strings like ':d', ':p' and 'xd'
-          - from all the convos the result of value_counts has to be cleared
-          and has to be truncated (that is not use the 200th most used word, only top10 let's say)
-          - then these series has to be merged in a way that the same string's counts are added up
-          - what about typos????!
-        """
-        pass
-
-    def most_used_messages_by_partners(self, **kwargs):
-        pass
-
-    def most_used_words_by_me(self, **kwargs):
-        pass
-
-    def most_used_words_by_partners(self, **kwargs):
-        pass
-
-    # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
-    def days_when_most_messages_sent(self):
-        pass
-
-    def days_when_most_messages_received(self):
-        pass
-
-    def hours_when_most_messages_sent(self):
-        # TODO LATER hard algorithmic problem
-        # is this referring to the absolute hour most messages sent??
-        # like: 2014.07.25. 15h-16h
-        # OR
-        # the pattern of most messages sent between this and this hours
-        # like: 20h-21h
-        # ACTUALLY BOTH
-        # for years/months/days/hours
-        # BUT this comes from the time series analysis
-        pass
-
-    def hours_when_most_messages_received(self):
-        pass
-
-    # 6. Time series: dict of 'year/month/day/hour : number of messages/words/characters (also sent/got) for user/all convos'
-    def time_series_analysis_for_all(self, subject=None, **kwargs):
-        time_series = generate_date_series(**kwargs)
-        stacked_df = self.stack_dfs(self.people)
-        interval_stats = get_stats_for_intervals(self.get_stats, stacked_df, time_series, subject=subject)
-        # TODO finsh this for time series for all
-
-    @staticmethod
-    def stack_dfs(people):
-        dfs = []
-        for data in people.values():
-            if data.messages is not None:
-                dfs.append(data.messages)
-        return pd.concat(dfs).sort_index()
diff --git a/miner/People.py b/miner/People.py
index 707c6f5..2970390 100644
--- a/miner/People.py
+++ b/miner/People.py
@@ -3,8 +3,6 @@
 from miner.Conversations import Conversations
 from miner.Friends import Friends
 
-# from Me import Me
-
 DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
 
 
@@ -21,7 +19,7 @@ def data(self):
 
     @property
     def names(self):
-        return self._names
+        return self._names #if len(self._names) > 1 else self._names[0]
 
     @property
     def groups(self):
@@ -36,9 +34,10 @@ def get_people(self, name=None):
         # TODO LATER too slow -> store in file
         start = time.time()
         conversations = Conversations(self.data_path)
-        individuals = conversations.get_people_from_private_messages()
-
-        print('convos: ', time.time() - start)
+        print('convos1: ', time.time() - start)
+        start = time.time()
+        individuals = conversations.get_people_from_private_messages(name=name)
+        print('convos2: ', time.time() - start)
 
         return self.unify_people(friends, individuals)
 
diff --git a/miner/requirements.txt b/miner/requirements.txt
new file mode 100644
index 0000000..1262ec9
--- /dev/null
+++ b/miner/requirements.txt
@@ -0,0 +1,9 @@
+numpy==1.18.1
+pandas==1.0.3
+dateparser==0.7.6
+seaborn==0.10.1
+matplotlib==3.2.1
+plotly==4.8.2
+miner==0.0.0
+Pillow==7.2.0
+python_dateutil==2.8.1
diff --git a/miner/utils.py b/miner/utils.py
index c80001b..cfa7644 100644
--- a/miner/utils.py
+++ b/miner/utils.py
@@ -1,6 +1,5 @@
 import os
 import json
-import pandas as pd
 import dateparser
 from datetime import datetime, timedelta
 from dateutil.relativedelta import relativedelta
@@ -9,6 +8,13 @@
 MEDIA_DIRS = ['photos', 'gifs', 'files', 'videos', 'audio']
 MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october',
           'november', 'december']
+WEEKDAYS = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
+PERIOD_MAP = {
+    'y': None,
+    'm': MONTHS,
+    'd': WEEKDAYS,
+    'h': None,
+}
 DELTA_MAP = {
     'y': relativedelta(years=+1),
     'm': relativedelta(months=+1),
@@ -34,17 +40,14 @@ def read_json(file):
 
 
 def dump_to_json(data=None, file=None):
-    with open(file, 'w') as f:
-        json.dump(data, f)
+    with open(file, 'w', encoding='utf8') as f:
+        json.dump(data, f, ensure_ascii=False)
 
 
 def order_list_of_dicts(lst, key='timestamp_ms'):
     return sorted(lst, key=lambda k: k[key])
 
 
-#
-
-
 def year_converter(func):
     """
     Higher-order function that converts @year param passed to @func into numeric version.
@@ -121,31 +124,22 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def generate_date_series(start=None, end=None, period=None):
+def generate_date_series(period, start=None, end=None):
     if period is None or DELTA_MAP.get(period) is None:
         raise ValueError('Parameter `period` should be one of {y, m, d, h}')
     start = start or datetime(year=2009, month=10, day=2, hour=0)  # TODO LATER change this to date when user joined FB
     end = end or datetime.now()
 
+    # TODO THIS HAS A PROBLEM. msgs happened in 2020 getting assigned to 2019 because: 2019 + 1 year + start.month + start.day < now()
+    # TODO serious problem!
     dates = []
     intermediate = start
-    while intermediate <= end:
+    while intermediate <= (end + DELTA_MAP.get(period)):  # means that we want to have the end in it as well
         dates.append(intermediate)
         intermediate = intermediate + DELTA_MAP.get(period)
     return dates
 
 
-def get_stats_for_intervals(func, df, time_series, subject='all'):
-    data = {}
-    for i in range(len(time_series) - 1):  # only looping len - 1 times
-        start = time_series[i]
-        # TODO test it with new data injected/modified at runtime <- this is hard
-        # what is this about actually?
-        end = time_series[i + 1]
-        data[start] = func(df, subject=subject, start=start, end=end)
-    return data
-
-
 def dt(year: int = 2004, month: int = 1, day: int = 1, hour: int = 0):
     return datetime(year=year, month=month, day=day, hour=hour)
 
@@ -191,10 +185,42 @@ def without_accent_and_whitespace(col):
     return col.apply(replace_accents)
 
 
-def walk_directory_and_search(path, extension):
+def walk_directory_and_search(path, extension, contains_string=None):
     paths = []
     for root, dirs, files in os.walk(path):
         for file_name in files:
             if file_name.endswith(extension):
-                paths.append(os.path.join(root, file_name))
+                if contains_string is not None and contains_string in file_name:
+                    paths.append(os.path.join(root, file_name))
     return paths
+
+
+def fill_dict(dictionary, key, value):
+    if dictionary.get(key) is not None:
+        dictionary[key] += value
+    else:
+        dictionary[key] = value
+    return dictionary
+
+
+def month_sorter(x):
+    return MONTHS.index(x[0])
+
+
+def count_stat_for_period(data, period):
+    # TODO sort by lists
+    periods = {}
+    for key, value in data.items():
+        if period == 'y':
+            periods = fill_dict(periods, key.year, value)
+            periods = dict(sorted(periods.items()))
+        elif period == 'm':
+            periods = fill_dict(periods, MONTHS[key.month - 1], value)
+            periods = dict(sorted(periods.items(), key=lambda x: MONTHS.index(x[0])))
+        elif period == 'd':
+            periods = fill_dict(periods, WEEKDAYS[key.weekday()], value)
+            periods = dict(sorted(periods.items(), key=lambda x: WEEKDAYS.index(x[0])))
+        elif period == 'h':
+            periods = fill_dict(periods, key.hour, value)
+            periods = dict(sorted(periods.items()))
+    return periods
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..76f8eef
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+seaborn==0.10.1
+dateparser==0.7.6
+pandas==1.0.3
+matplotlib==3.2.1
+python_dateutil==2.8.1
diff --git a/tests/test_ConversationAnalyzer.py b/tests/test_ConversationAnalyzer.py
index f7d679f..9d11e46 100644
--- a/tests/test_ConversationAnalyzer.py
+++ b/tests/test_ConversationAnalyzer.py
@@ -1,36 +1,35 @@
 import pytest
-from miner.ConversationAnalyzer import ConversationAnalyzer
-from miner.People import People
+
+from miner.Analyzer import Analyzer
 from miner.utils import dt
 
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
 
-@pytest.fixture(scope='session')
-def person(get_people):
-    def _person(name):
-        people = get_people(name)
-        return people.data[name]
-
-    return _person
+# @pytest.fixture(scope='session')
+# def person(get_people):
+#     def _person(name):
+#         people = get_people(name)
+#         return people.data[name]
+#
+#     return _person
 
 
 @pytest.fixture(scope='session')
-def analyze(person):
+def analyze(get_people):
     def _analyze(name):
-        individual = person(name)
-        return ConversationAnalyzer(name, individual.messages)
+        people = get_people(name)
+        return Analyzer(people)
 
     return _analyze
 
 
 @pytest.fixture(scope='session')
-def statistics(person, analyze):
+def statistics(analyze):
     def _stats(name, **kwargs):
-        individual = person(name)
         analyzer = analyze(name)
         if 'subject' in kwargs or 'start' in kwargs or 'end' in kwargs:  # and others
-            return analyzer.get_stats(individual.messages, **kwargs)
+            return analyzer.get_stats(**kwargs)
         else:
             return analyzer.stats
 
@@ -219,10 +218,6 @@ def test_stats_teflon_musk_all_2014_12(statistics):
     # assert stats.most_used_chars == 0
 
 
-class TestConversationAnalyzer:  # Foo Bar
-    pass
-
-
 def test_time_series_analysis_for_user(analyze):
     analyzer = analyze('Teflon Musk')
     analyzer.get_time_series_data(subject='all', period='y')
diff --git a/tests/test_Conversations.py b/tests/test_Conversations.py
index d066361..ef9fdc3 100644
--- a/tests/test_Conversations.py
+++ b/tests/test_Conversations.py
@@ -1,72 +1,76 @@
 import pandas as pd
 import pytest
 from miner.Conversations import Conversations
+from miner.Individual import Individual
 from miner import utils
 import os
+
 TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
 
 
 @pytest.fixture()
-def convos():
-    convo = Conversations(f'{TEST_DATA_PATH}')
-    return convo.get_people_from_private_messages()
+def conversations():
+    return Conversations(f'{TEST_DATA_PATH}')
+
+
+@pytest.fixture
+def people_from_private_convos(conversations):
+    return conversations.get_people_from_private_messages()
+
+
+def test_if_paths_are_registered(conversations):
+    assert len(conversations.private_convo_paths) == 4
+    assert len(conversations.group_convo_paths) == 3
+    assert len(conversations.deleted_user_convo_paths) == 0
 
 
-def test_get_all_people_from_convo(convos):
+def test_get_all_people_from_private_messages(people_from_private_convos):
+    people = list(people_from_private_convos.keys())
+    expected = ['Foo Bar', 'Teflon Musk', 'Benedek Elek', 'Tőke Hal']
+    assert sorted(people) == sorted(expected)
+
+
+def test_get_all_people_from_convo(conversations):
     people = []
-    # TODO make this work
-    for convo in convos.keys():
-        if convo.startswith('group'):
-            people += [p for p in convos[convo].get('participants')]
-        else:
-            people.append(convo)
-    people = list(set(people))
+    # indie
+    people += list(conversations.private_convo_paths.keys())
+    # group
+    people_from_groups = [p for people in conversations.group_convo_paths.values() for p in people]
+
+    people += people_from_groups
 
     expected = ['Dér Dénes', 'Facebook User', 'Foo Bar', 'John Doe', 'Teflon Musk', 'Benedek Elek', 'Donald Duck',
                 'Tőke Hal']
-    # TODO LATER what to do with Facebook User??????
-    assert sorted(people) == sorted(expected)
 
+    assert sorted(list(set(people))) == sorted(expected)
+
+
+def test_people_are_individual_instances(people_from_private_convos):
+    assert all([isinstance(person, Individual) for person in people_from_private_convos.values()])
+
+
+def test_all_individual_have_messages_df(people_from_private_convos):
+    assert all([isinstance(data.messages, pd.DataFrame) for data in people_from_private_convos.values()])
 
-def test_all_convos_have_dir(convos):
-    assert all([data.messages_dir for data in convos.values()])
 
+def test_all_individual_have_dir(people_from_private_convos):
+    assert all([data.messages_dir for data in people_from_private_convos.values()])
 
-def test_all_convos_have_messages_df(convos):
-    assert all([isinstance(data.messages, pd.DataFrame) for data in convos.values()])
 
+def test_some_individual_as_media_dir(people_from_private_convos):
+    assert people_from_private_convos.get('Teflon Musk').media_dir
+    assert not people_from_private_convos.get('Benedek Elek').media_dir
 
-def test_some_convos_as_media_dir(convos):
-    assert convos.get('Teflon Musk').media_dir
-    assert not convos.get('Benedek Elek').media_dir
 
-def test_convo_media_has_one_folder_of_possibles(convos):
-    listed_dir = os.listdir(f"{TEST_DATA_PATH}/{utils.MESSAGE_SUBPATH}/{convos.get('Teflon Musk').media_dir}")
+def test_individual_media_has_one_folder_of_possibles(people_from_private_convos):
+    listed_dir = os.listdir(
+        f"{TEST_DATA_PATH}/{utils.MESSAGE_SUBPATH}/{people_from_private_convos.get('Teflon Musk').media_dir}")
     assert 'files' in listed_dir
     assert 'photos' in listed_dir
     assert 'audio' not in listed_dir
 
-def test_groups_have_more_than_two_participates(convos):
-    groups = {convo: data for convo, data in convos.items() if convo.startswith('group')}
+
+def test_groups_have_more_than_two_participates(people_from_private_convos):
+    groups = {convo: data for convo, data in people_from_private_convos.items() if convo.startswith('group')}
     # TODO participants should contain the user itself as well
     assert all([len(data.get('participants')) > 2 for data in groups.values()])
-
-
-
-"""
-testcases:
-- individual convos contain all names, compact_names, message folders and media folders
-  - media folders are a big question. how do you get it? actually once you have the thread_path then from that you can guess,
-  OR better off use the uri in the messages... fuck seems complicated
-- friends contain all names and compact names,
-- convos and friends has a common set, and the set is identical
-- people gets assigned with all the unique friends and individual/group convos
-
-gonna test:
-- assigning messages to friends,
-- deal with multiple directories, IF there are multiple directories,
-- 
-concerns:
-- what to do with non-friends,
-- I assume multiple directories are because of files sent,
-"""
diff --git a/tests/test_MessagingAnalyzer.py b/tests/test_MessagingAnalyzer.py
index ec03497..969dc5d 100644
--- a/tests/test_MessagingAnalyzer.py
+++ b/tests/test_MessagingAnalyzer.py
@@ -1,244 +1,245 @@
 import pytest
-from miner.MessagingAnalyzer import MessagingAnalyzer
+
+from miner.Analyzer import Analyzer
 from miner.utils import dt
 
 @pytest.fixture(scope='session')
 def analyzer(get_people):
     people = get_people()
-    return MessagingAnalyzer(people)
+    return Analyzer(people)
 
 
 def test_total_number_of_messages(analyzer):
-    assert analyzer.total_number_of_messages() == 29
+    assert analyzer.get_count(attribute='msg_count', ) == 29
 
-    assert analyzer.total_number_of_messages(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2014), period='y') == 11
-    assert analyzer.total_number_of_messages(start=dt(year=2018), period='y') == 3
-    assert analyzer.total_number_of_messages(start=dt(year=2020), period='y') == 15
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014), period='y') == 11
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018), period='y') == 3
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020), period='y') == 15
 
-    assert analyzer.total_number_of_messages(start=dt(year=2011, month=11), period='m') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=9), period='m') == 1
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=11), period='m') == 8
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=12), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2011, month=11), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=9), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=11), period='m') == 8
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=12), period='m') == 2
 
-    assert analyzer.total_number_of_messages(start=dt(year=2018, month=1), period='m') == 3
-    assert analyzer.total_number_of_messages(start=dt(year=2018, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018, month=1), period='m') == 3
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018, month=5), period='m') == 0
 
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2), period='m') == 10
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=3), period='m') == 1  # jpg
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=4), period='m') == 2
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=8), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2), period='m') == 10
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=3), period='m') == 1  # jpg
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=4), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=8), period='m') == 1
 
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13), period='d') == 2
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13), period='d') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 2
 
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13, hour=6), period='d') == 4
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13, hour=6), period='d') == 4
 
 
 def test_total_number_of_words(analyzer):
-    assert analyzer.total_number_of_words() == 86
+    assert analyzer.get_count(attribute='word_count', ) == 86
 
-    assert analyzer.total_number_of_words(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2014), period='y') == 20
-    assert analyzer.total_number_of_words(start=dt(year=2018), period='y') == 32
-    assert analyzer.total_number_of_words(start=dt(year=2020), period='y') == 34
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014), period='y') == 20
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018), period='y') == 32
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020), period='y') == 34
 
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=9), period='m') == 6
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=11), period='m') == 13
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=12), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=9), period='m') == 6
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=11), period='m') == 13
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=12), period='m') == 1
 
-    assert analyzer.total_number_of_words(start=dt(year=2018, month=1), period='m') == 32
-    assert analyzer.total_number_of_words(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018, month=1), period='m') == 32
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2), period='m') == 27
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=4), period='m') == 4
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=8), period='m') == 2
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2), period='m') == 27
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=4), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=8), period='m') == 2
 
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2, day=13), period='d') == 14
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2, day=13, hour=5), period='d') == 14
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2, day=13), period='d') == 14
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2, day=13, hour=5), period='d') == 14
 
 
 def test_total_number_of_characters(analyzer):
-    assert analyzer.total_number_of_characters() == 379
+    assert analyzer.get_count(attribute='char_count', ) == 379
 
-    assert analyzer.total_number_of_characters(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2014), period='y') == 69
-    assert analyzer.total_number_of_characters(start=dt(year=2018), period='y') == 170
-    assert analyzer.total_number_of_characters(start=dt(year=2020), period='y') == 140
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014), period='y') == 69
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018), period='y') == 170
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020), period='y') == 140
 
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=9), period='m') == 24
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=11), period='m') == 42
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=12), period='m') == 3
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=9), period='m') == 24
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=11), period='m') == 42
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=12), period='m') == 3
 
-    assert analyzer.total_number_of_characters(start=dt(year=2018, month=1), period='m') == 170
-    assert analyzer.total_number_of_characters(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018, month=1), period='m') == 170
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=2), period='m') == 114
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=4), period='m') == 17
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=5), period='m') == 4
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=8), period='m') == 5
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=2), period='m') == 114
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=4), period='m') == 17
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=5), period='m') == 4
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=8), period='m') == 5
 
 
 def test_total_number_of_messages_sent(analyzer):
-    assert analyzer.total_number_of_messages_sent() == 17
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014), period='y') == 6
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018), period='y') == 2
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020), period='y') == 9
+    assert analyzer.get_count(attribute='msg_count', subject='me', ) == 17
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014), period='y') == 6
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018), period='y') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020), period='y') == 9
 
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=9), period='m') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=12), period='m') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018, month=1), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=9), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=12), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018, month=1), period='m') == 2
 
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2011, month=11), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2011, month=11), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018, month=5), period='m') == 0
 
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2), period='m') == 6
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=4), period='m') == 2
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=8), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2), period='m') == 6
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=4), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=8), period='m') == 1
 
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13), period='d') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13, hour=18), period='h') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13), period='d') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13, hour=18), period='h') == 0
 
 
 def test_total_number_of_words_sent(analyzer):
-    assert analyzer.total_number_of_words_sent() == 69
+    assert analyzer.get_count(attribute='word_count', subject='me', ) == 69
 
-    assert analyzer.total_number_of_words_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014), period='y') == 16
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018), period='y') == 31
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020), period='y') == 22
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014), period='y') == 16
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018), period='y') == 31
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020), period='y') == 22
 
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=9), period='m') == 6
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=11), period='m') == 9
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=12), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=9), period='m') == 6
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=11), period='m') == 9
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=12), period='m') == 1
 
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018, month=1), period='m') == 31
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018, month=1), period='m') == 31
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2), period='m') == 16
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=4), period='m') == 4
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=8), period='m') == 2
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2), period='m') == 16
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=4), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=8), period='m') == 2
 
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13), period='d') == 5
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 5
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13), period='d') == 5
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 5
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
 
 
 def test_total_number_of_characters_sent(analyzer):
-    assert analyzer.total_number_of_characters_sent() == 311
+    assert analyzer.get_count(attribute='char_count', subject='me', ) == 311
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014), period='y') == 60
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018), period='y') == 167
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020), period='y') == 84
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014), period='y') == 60
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018), period='y') == 167
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020), period='y') == 84
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=9), period='m') == 24
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=11), period='m') == 33
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=12), period='m') == 3
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=9), period='m') == 24
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=11), period='m') == 33
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=12), period='m') == 3
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018, month=1), period='m') == 167
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018, month=1), period='m') == 167
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2), period='m') == 62
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=4), period='m') == 17
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=8), period='m') == 5
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2), period='m') == 62
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=4), period='m') == 17
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=8), period='m') == 5
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=6), period='d') == 21
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=7), period='d') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='d') == 21
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='d') == 0
 
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 21
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 21
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
 
 
 def test_total_number_of_messages_received(analyzer):
-    assert analyzer.total_number_of_messages_received() == 12
-    assert analyzer.total_number_of_messages_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014), period='y') == 5
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018), period='y') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020), period='y') == 6
+    assert analyzer.get_count(attribute='msg_count', subject='partner', ) == 12
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014), period='y') == 5
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018), period='y') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020), period='y') == 6
 
-    assert analyzer.total_number_of_messages_received(start=dt(year=2011, month=11), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2011, month=11), period='m') == 0
 
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=12), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=12), period='m') == 1
 
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018, month=1), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018, month=1), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018, month=5), period='m') == 0
 
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2), period='m') == 4
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=3), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=4), period='m') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=8), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=3), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=4), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=8), period='m') == 0
 
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=13), period='d') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=14), period='d') == 2
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=18), period='d') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 1
 
 
 def test_total_number_of_words_received(analyzer):
-    assert analyzer.total_number_of_words_received() == 17
+    assert analyzer.get_count(attribute='word_count', subject='partner', ) == 17
 
-    assert analyzer.total_number_of_words_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2014), period='y') == 4
-    assert analyzer.total_number_of_words_received(start=dt(year=2018), period='y') == 1
-    assert analyzer.total_number_of_words_received(start=dt(year=2020), period='y') == 12
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014), period='y') == 4
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018), period='y') == 1
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020), period='y') == 12
 
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=12), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=12), period='m') == 0
 
-    assert analyzer.total_number_of_words_received(start=dt(year=2018, month=1), period='m') == 1
-    assert analyzer.total_number_of_words_received(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018, month=1), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2), period='m') == 11
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2), period='m') == 11
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=5), period='m') == 1
 
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=13), period='d') == 9
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=14), period='d') == 2
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=18), period='d') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 9
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 2
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 0
 
 
 def test_total_number_of_characters_received(analyzer):
-    assert analyzer.total_number_of_characters_received() == 68
+    assert analyzer.get_count(attribute='char_count', subject='partner', ) == 68
 
-    assert analyzer.total_number_of_characters_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014), period='y') == 9
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018), period='y') == 3
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020), period='y') == 56
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014), period='y') == 9
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018), period='y') == 3
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020), period='y') == 56
 
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=11), period='m') == 9
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=12), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=11), period='m') == 9
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=12), period='m') == 0
 
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018, month=1), period='m') == 3
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018, month=2), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018, month=1), period='m') == 3
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018, month=2), period='m') == 0
 
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2), period='m') == 52
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=5), period='m') == 4
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2), period='m') == 52
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=5), period='m') == 4
 
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=13), period='d') == 30
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=14), period='d') == 22
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=18), period='d') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 30
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 22
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 0
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 4d11263..3e57e17 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -65,19 +65,20 @@
 
 
 def test_generate_date_series():
+    # TODO resolve
     start = datetime(2020, 1, 1, 0, 0)
     end = datetime(2021, 1, 1, 0, 0)
 
-    date_range_year = generate_date_series(start, end, 'y')
+    date_range_year = generate_date_series('y', start, end)
     assert len(date_range_year) == 1 + 1
 
-    date_range_month = generate_date_series(start, end, 'm')
+    date_range_month = generate_date_series('m', start, end)
     assert len(date_range_month) == 12 + 1
 
-    date_range_day = generate_date_series(start, end, 'd')
+    date_range_day = generate_date_series('d', start, end)
     assert len(date_range_day) == 366 + 1
 
-    date_range_hour = generate_date_series(start, end, 'h')
+    date_range_hour = generate_date_series('h', start, end)
     assert len(date_range_hour) == (366 * 24) + 1
 
     for day in date_range_day:

From 55ba0d0f3f21a96bb7a18315ae7d7b8f596e1863 Mon Sep 17 00:00:00 2001
From: Levente Csoke <leventec3@gmail.com>
Date: Sun, 16 Aug 2020 14:35:48 +0200
Subject: [PATCH 3/3] refactored analyzer; added some documentation; resolved
 TODOs

---
 miner/Analyzer.py                  | 108 ++++++++++++++---------------
 miner/App.py                       |  11 ++-
 miner/Conversations.py             |  16 +++--
 miner/FacebookData.py              |   6 +-
 miner/Friends.py                   |   3 +
 miner/Group.py                     |   8 +--
 miner/Individual.py                |   4 ++
 miner/Me.py                        |   4 ++
 miner/Messages.py                  |  10 +--
 miner/People.py                    |  11 ++-
 miner/requirements.txt             |   4 --
 miner/utils.py                     |  67 ++++++++++++++----
 tests/conftest.py                  |   4 +-
 tests/test_ConversationAnalyzer.py |   7 +-
 tests/test_Conversations.py        |   3 +-
 tests/test_Friends.py              |   3 +-
 tests/test_utils.py                |  10 +--
 17 files changed, 166 insertions(+), 113 deletions(-)

diff --git a/miner/Analyzer.py b/miner/Analyzer.py
index b0e61bc..17d9f68 100644
--- a/miner/Analyzer.py
+++ b/miner/Analyzer.py
@@ -1,34 +1,45 @@
+import pandas as pd
+
 from miner.ConversationStats import ConversationStats
 from miner import utils
-import pandas as pd
 
 
 class Analyzer:
-    # TODO do we need to override __subclasscheck__ ?
+    """
+    Analyzer for analyzing specific and/or all conversations
 
-    # def __new__(cls, name, messages, *args, **kwargs):
-    #     if messages is None:  # This deals with the case if no messages
-    #         return None
-    #     return super(Analyzer, cls).__new__(cls, *args, **kwargs)
+    """
 
     def __init__(self, people):
         self.people = people
         self.people_data = people.data
-        self.names = people.names
+        self.names = list(people.names)
         self.multi = len(self.people_data) > 1
 
         if self.multi:
-            self.df = self.stack_dfs()
+            self.df = self.stack_dfs(self.people_data)
+        else:
+            self.df = self.people_data.get(self.names[0]).messages
+
+    def __str__(self):
+        if self.multi:
+            return self.names
         else:
-            # TODO solve this hand in hand with the __new__ method. too ugly
-            self.df = self.people_data.get(list(self.names)[0]).messages
+            return f'{self.names[0]}: {list(self.df.index)}'
 
-    def get_stats_for_intervals(self, time_series, subject='all'):
+    @property
+    def stats(self):
+        return self.get_stats()
+
+    def get_stats_for_intervals(self, time_series, period, subject='all'):
         data = {}
-        for i in range(len(time_series) - 1):  # only looping len - 1 times
+        for i in range(len(time_series)):
             start = time_series[i]
-            end = time_series[i + 1]
-            data[start] = self.get_stats(self.df, subject=subject, start=start, end=end)
+            try:  # with this solution we will have data for the very last moments until datetime.now()
+                end = time_series[i + 1]
+            except IndexError:
+                end = None
+            data[start] = self.get_stats(df=self.df, subject=subject, start=start, end=end, period=period)
         return data
 
     def get_stats(self, df=None, subject='all', start=None, end=None, period=None):
@@ -37,30 +48,6 @@ def get_stats(self, df=None, subject='all', start=None, end=None, period=None):
         stats = ConversationStats(df)
         return stats
 
-    @staticmethod
-    def get_plottable_time_series_data(interval_stats, statistic):
-        for k, v in interval_stats.items():
-            if isinstance(v, ConversationStats):
-                interval_stats[k] = getattr(v, statistic)
-        return interval_stats
-
-    @property
-    def stats(self):
-        return self.get_stats()
-
-    def __str__(self):
-        if self.multi:
-            return self.names
-        else:
-            return f'{self.names[0]}: {list(self.df.index)}'
-
-    def stack_dfs(self):
-        dfs = []
-        for data in self.people_data.values():
-            if data.messages is not None:
-                dfs.append(data.messages)
-        return pd.concat(dfs).sort_index()
-
     # 1. Total count of messages/words/characters (also by year/month/day/hour)
     # 2. Total count of messages/words/characters sent (also by year/month/day/hour)
     # 3. Total count of messages/words/characters received (also by year/month)
@@ -68,8 +55,6 @@ def get_count(self, attribute, subject='all', start=None, end=None, period=None)
         stats = self.get_stats(subject=subject, start=start, end=end, period=period)
         return getattr(stats, attribute)
 
-    #################
-
     # 4. Most used messages/words in convos by me/partner (also by year/month/day/hour)
     def most_used_messages_(self, **kwargs):
         """
@@ -88,38 +73,47 @@ def most_used_messages_(self, **kwargs):
         pass
 
     # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
-    def stat_per_period(self, period, attribute, **kwargs):
+    def stat_per_period(self, period, statistic, **kwargs):
         interval_stats = self.get_time_series_data(period, **kwargs)
-        # TODO attribute is one of (msg, word, char)
-        time_series_data = self.get_plottable_time_series_data(interval_stats, statistic=attribute)
+        time_series_data = self.get_stat_count(interval_stats, statistic=statistic)
         return utils.count_stat_for_period(time_series_data, period)
 
-    # 6. Time series: dict of 'year/month/day/hour : number of messages/words/characters (also sent/got) for user/all convos'
+    # 6. Time series: dict of 'y/m/d/h : number of messages/words/characters (also sent/got) for user/all convos'
     def get_time_series_data(self, period, subject='all', **kwargs):
-        time_series = utils.generate_date_series(period, **kwargs)
-        return self.get_stats_for_intervals(self.df, time_series, subject=subject)
+        time_series = utils.generate_date_series(period=period, **kwargs)
+        return self.get_stats_for_intervals(time_series, period, subject=subject)
 
-    # # 7. Ranking of friends by messages by y/m/d/h, by different stats, by sent/got
-    def get_ranking_of_friends_by_messages(self, attribute='msg_count', subject='all', start=None, end=None,
-                                           period=None):
-        # TODO almost the same function as get_count
+    # # 7. Ranking of partners by messages by y/m/d/h, by different stats, by sent/got
+    def get_ranking_of_partners_by_messages(self, statistic='msg_count', **kwargs):
         count_dict = {}
         for name in self.names:
-            # analyzer = Analyzer({name: self.people.get(name)}) # this has to be a people instance?! OR?
-            # analyzer = Analyzer(People(self.people.data_path, name=name))  # this has to be a people instance?! OR?
             df = self.df[self.df.partner == name]
-            stats = self.get_stats(df=df, subject=subject, start=start, end=end, period=period)
+            stats = self.get_stats(df=df, **kwargs)
             if stats is not None:
-                count_dict = utils.fill_dict(count_dict, name, getattr(stats, attribute))
-
-        count_dict = {key: value for key, value in sorted(count_dict.items(), key=lambda item: item[1], reverse=True)}
+                count_dict = utils.fill_dict(count_dict, name, getattr(stats, statistic))
         return count_dict
 
+    @staticmethod
+    def stack_dfs(people_data):
+        dfs = []
+        for data in people_data.values():
+            if data.messages is not None:
+                dfs.append(data.messages)
+        return pd.concat(dfs).sort_index()
+
+    @staticmethod
+    @utils.attribute_checker
+    def get_stat_count(interval_stats, statistic='msg_count'):
+        for k, v in interval_stats.items():
+            interval_stats[k] = getattr(v, statistic)
+        return interval_stats
+
     @staticmethod
     @utils.subject_checker
     @utils.date_checker
-    @utils.period_checker
+    @utils.start_end_period_checker
     def filter_by_input(df, subject='all', start=None, end=None, period=None):
+
         if subject == 'me':
             df = df[df.sender_name == 'Levente Csőke']
         elif subject == 'partner':
diff --git a/miner/App.py b/miner/App.py
index 4989e8b..7813f9e 100644
--- a/miner/App.py
+++ b/miner/App.py
@@ -1,11 +1,16 @@
-from miner.Analyzer import Analyzer
+import os
 
+from miner.Analyzer import Analyzer
 from miner.People import People
 
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
+DATA_PATH = f'{os.getcwd()}/data'
 
 
 class App:
+    """
+    Entrypoint. Not yet used extensively.
+    # TODO LATER turn it into a cli
+    """
     def __init__(self):
         pass
 
@@ -14,7 +19,7 @@ def analyze_messages():
         p = People(path=DATA_PATH)
 
         analyzer = Analyzer(p)
-        rank = analyzer.get_ranking_of_friends_by_messages(attribute='char_count')
+        rank = analyzer.get_ranking_of_partners_by_messages(attribute='char_count')
 
 
 if __name__ == '__main__':
diff --git a/miner/Conversations.py b/miner/Conversations.py
index 7a373d4..b4a4381 100644
--- a/miner/Conversations.py
+++ b/miner/Conversations.py
@@ -1,7 +1,6 @@
 import pandas as pd
 import os
 
-
 from miner.Messages import Messages
 from miner.Individual import Individual
 
@@ -9,9 +8,13 @@
 
 
 class Conversations:
+    """
+    Class for managing and parsing conversations
+    """
+
     def __init__(self, data_path):
         self.private_convo_paths = {}
-        self.group_convo_paths = {} # TODO fill this as well
+        self.group_convo_paths = {}  # TODO LATER fill this as well
         self.deleted_user_convo_paths = []  # NOTE these are collected but not yet used
 
         self.data_path = f'{data_path}/{utils.MESSAGE_SUBPATH}'
@@ -41,7 +44,7 @@ def differentiate_paths(self, jsons):
 
     def register_paths(self):
         utils.dump_to_json(self.private_convo_paths, f'{self.data_path}/private_messages.json')
-        
+
     def read_paths(self, file):
         self.private_convo_paths = utils.read_json(file)
         print()
@@ -55,6 +58,8 @@ def map_private_convo_files(self, msg, file):
 
     def map_group_convo_files(self, msg, file):
         for participant in msg.participants:
+            if participant == 'Levente Csőke':
+                continue
             if self.group_convo_paths.get(file):
                 self.group_convo_paths[file].append(participant)
             else:
@@ -100,7 +105,4 @@ def group_membership(name):
         return None
 
     def get_people_from_group_messages(self):
-        pass  # TODO for v0.0.4
-
-
-
+        pass
diff --git a/miner/FacebookData.py b/miner/FacebookData.py
index 81b946f..ef5ba78 100644
--- a/miner/FacebookData.py
+++ b/miner/FacebookData.py
@@ -3,6 +3,10 @@
 
 
 class FacebookData:
+    """
+    Base class for reading in tabular data from JSONs.
+    """
+
     def __init__(self, json_path):
         self.json_path = json_path
         self._df = None
@@ -21,7 +25,7 @@ def json(self):
 
     @property
     def compact_names(self):
-        name_list = list(utils.without_accent_and_whitespace(utils.lower_names(self.names)))  # should be just fine
+        name_list = list(utils.without_accent_and_whitespace(utils.lower_names(self.names)))
         return name_list[0] if len(name_list) == 1 else name_list
 
     def to_df(self, field=None):
diff --git a/miner/Friends.py b/miner/Friends.py
index 98d995f..5acc1be 100644
--- a/miner/Friends.py
+++ b/miner/Friends.py
@@ -3,6 +3,9 @@
 
 
 class Friends(FacebookData):
+    """
+    Class for storing data in friends.json
+    """
 
     def __init__(self, *args):
         super().__init__(*args)
diff --git a/miner/Group.py b/miner/Group.py
index 94e1ed9..3a1b131 100644
--- a/miner/Group.py
+++ b/miner/Group.py
@@ -4,6 +4,10 @@
 
 
 class Group:
+    """
+    Class for holding a group-message's data
+    """
+
     def __init__(self, name=None, title=None, messages=None, compact=None, messages_dir=None, media_dir=None,
                  members=None):
         self._name = name
@@ -26,10 +30,6 @@ def title(self):
     def messages(self):
         return self._messages
 
-    # @property
-    # def get_message_jsons(self):
-    #     return self._messages
-
     @property
     def media_dir(self):
         return self._media_dir
diff --git a/miner/Individual.py b/miner/Individual.py
index 4518a5f..6f818ab 100644
--- a/miner/Individual.py
+++ b/miner/Individual.py
@@ -1,4 +1,8 @@
 class Individual:
+    """
+    Class for holding a person's data the user ever interacted with
+    """
+
     def __init__(self, name=None, compact=None, messages=None, friend=None, messages_dir=None,
                  media_dir=None,
                  member_of=None):
diff --git a/miner/Me.py b/miner/Me.py
index b10356d..ef2179e 100644
--- a/miner/Me.py
+++ b/miner/Me.py
@@ -2,6 +2,10 @@
 
 
 class Me(FacebookData):
+    """
+    Class for storing basic data about the user
+    """
+
     def __init__(self, *args):
         super().__init__(*args)
 
diff --git a/miner/Messages.py b/miner/Messages.py
index 6fbc9d3..ebdaadf 100644
--- a/miner/Messages.py
+++ b/miner/Messages.py
@@ -7,6 +7,10 @@
 
 
 class Messages(FacebookData):
+    """
+    Class for representing data of all the messages with a user or a group
+    """
+
     def __init__(self, json_path):
         super().__init__(json_path)
         self.to_df('messages')
@@ -15,7 +19,6 @@ def __init__(self, json_path):
 
     @property
     def names(self):
-        # TODO ugly
         try:
             return pd.DataFrame(self.participants)[0]
         except KeyError:
@@ -24,10 +27,7 @@ def names(self):
     @property
     def participants(self):
         participants = self.decoded.get('participants')
-        # TODO I should be IN
-        # but this breaks stuff at TestMessagingAnalyzer
-        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
-        # return [p.get('name') for p in participants if p.get('name')]
+        return [p.get('name') for p in participants if p.get('name')]
 
     @property
     def title(self):
diff --git a/miner/People.py b/miner/People.py
index 2970390..b7852e8 100644
--- a/miner/People.py
+++ b/miner/People.py
@@ -1,12 +1,18 @@
 import time
+import os
 
 from miner.Conversations import Conversations
 from miner.Friends import Friends
 
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
+DATA_PATH = f'{os.getcwd()}/data'
 
 
 class People:
+    """
+    Class that manages and represents people from different kind of interactions
+    # TODO LATER abstractional flaw?! people? person? indie?
+    """
+
     def __init__(self, path=None, name=None):
         self.data_path = path if path else DATA_PATH
         self._groups = []
@@ -19,7 +25,7 @@ def data(self):
 
     @property
     def names(self):
-        return self._names #if len(self._names) > 1 else self._names[0]
+        return self._names  # if len(self._names) > 1 else self._names[0]
 
     @property
     def groups(self):
@@ -31,7 +37,6 @@ def get_people(self, name=None):
         friends = friend.get_people(name=name)
         print('friends: ', time.time() - start)
 
-        # TODO LATER too slow -> store in file
         start = time.time()
         conversations = Conversations(self.data_path)
         print('convos1: ', time.time() - start)
diff --git a/miner/requirements.txt b/miner/requirements.txt
index 1262ec9..8ee3351 100644
--- a/miner/requirements.txt
+++ b/miner/requirements.txt
@@ -1,9 +1,5 @@
 numpy==1.18.1
 pandas==1.0.3
 dateparser==0.7.6
-seaborn==0.10.1
 matplotlib==3.2.1
-plotly==4.8.2
-miner==0.0.0
-Pillow==7.2.0
 python_dateutil==2.8.1
diff --git a/miner/utils.py b/miner/utils.py
index cfa7644..6f6f565 100644
--- a/miner/utils.py
+++ b/miner/utils.py
@@ -1,8 +1,9 @@
-import os
-import json
-import dateparser
-from datetime import datetime, timedelta
 from dateutil.relativedelta import relativedelta
+from datetime import datetime, timedelta
+import dateparser
+from itertools import islice
+import json
+import os
 
 MESSAGE_SUBPATH = 'messages/inbox'
 MEDIA_DIRS = ['photos', 'gifs', 'files', 'videos', 'audio']
@@ -111,11 +112,10 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def period_checker(func):
+def start_end_period_checker(func):
     def wrapper(*args, **kwargs):
         if kwargs.get('start') is not None and kwargs.get('end') is not None:
             return func(*args, **kwargs)
-
         if not kwargs.get('period') or DELTA_MAP[kwargs.get('period')] is None:
             raise ValueError('Parameter `period` should be one of {y, m, d, h}')
         kwargs['period'] = DELTA_MAP[kwargs.get('period')]
@@ -124,17 +124,33 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def generate_date_series(period, start=None, end=None):
-    if period is None or DELTA_MAP.get(period) is None:
-        raise ValueError('Parameter `period` should be one of {y, m, d, h}')
-    start = start or datetime(year=2009, month=10, day=2, hour=0)  # TODO LATER change this to date when user joined FB
-    end = end or datetime.now()
+def period_checker(func):
+    def wrapper(*args, **kwargs):
+        if not kwargs.get('period') or DELTA_MAP[kwargs.get('period')] is None:
+            raise ValueError('Parameter `period` should be one of {y, m, d, h}')
+        return func(*args, **kwargs)
+
+    return wrapper
 
-    # TODO THIS HAS A PROBLEM. msgs happened in 2020 getting assigned to 2019 because: 2019 + 1 year + start.month + start.day < now()
-    # TODO serious problem!
+
+def get_start_based_on_period(join_date, period):
+    if period == 'y':
+        return datetime(join_date.year, 1, 1)
+    elif period == 'm':
+        return datetime(join_date.year, join_date.month, 1)
+    return join_date
+
+
+@period_checker
+def generate_date_series(period='y', start=None, end=None):
     dates = []
+
+    join_date = datetime(year=2009, month=10, day=2)  # TODO later get this from somewhere
+    start = start or get_start_based_on_period(join_date, period)
+    end = end or datetime.now()
+
     intermediate = start
-    while intermediate <= (end + DELTA_MAP.get(period)):  # means that we want to have the end in it as well
+    while intermediate <= end:  # means that we want to have the end in it as well
         dates.append(intermediate)
         intermediate = intermediate + DELTA_MAP.get(period)
     return dates
@@ -224,3 +240,26 @@ def count_stat_for_period(data, period):
             periods = fill_dict(periods, key.hour, value)
             periods = dict(sorted(periods.items()))
     return periods
+
+
+def sort_dict(dictionary, func=lambda x: x, reverse=False):
+    return {key: value for key, value in sorted(dictionary.items(), key=func, reverse=reverse)}
+
+
+def remove_items_where_value_is_falsible(dictionary):
+    return {k: v for k, v in dictionary.items() if v}
+
+
+# keep only first 20 entries
+def slice_dict(dictionary, n):
+    return dict(islice(dictionary.items(), n))
+
+
+def attribute_checker(func):
+    def wrapper(*args, **kwargs):
+        statistic = kwargs.get('statistic')
+        if not statistic or statistic not in ('msg_count', 'word_count', 'char_count'):
+            raise ValueError('Parameter `statistic` should be one of {msg_count, word_count, char_count}')
+        return func(*args, **kwargs)
+
+    return wrapper
diff --git a/tests/conftest.py b/tests/conftest.py
index 08e382b..ebdc35e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,9 @@
 import pytest
+import os
+
 from miner.People import People
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
 
 
 @pytest.fixture(scope='session')
diff --git a/tests/test_ConversationAnalyzer.py b/tests/test_ConversationAnalyzer.py
index 9d11e46..61ab388 100644
--- a/tests/test_ConversationAnalyzer.py
+++ b/tests/test_ConversationAnalyzer.py
@@ -3,8 +3,6 @@
 from miner.Analyzer import Analyzer
 from miner.utils import dt
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
-
 
 # @pytest.fixture(scope='session')
 # def person(get_people):
@@ -218,7 +216,4 @@ def test_stats_teflon_musk_all_2014_12(statistics):
     # assert stats.most_used_chars == 0
 
 
-def test_time_series_analysis_for_user(analyze):
-    analyzer = analyze('Teflon Musk')
-    analyzer.get_time_series_data(subject='all', period='y')
-    assert 1
+
diff --git a/tests/test_Conversations.py b/tests/test_Conversations.py
index ef9fdc3..5a7cb58 100644
--- a/tests/test_Conversations.py
+++ b/tests/test_Conversations.py
@@ -5,7 +5,7 @@
 from miner import utils
 import os
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
 
 
 @pytest.fixture()
@@ -72,5 +72,4 @@ def test_individual_media_has_one_folder_of_possibles(people_from_private_convos
 
 def test_groups_have_more_than_two_participates(people_from_private_convos):
     groups = {convo: data for convo, data in people_from_private_convos.items() if convo.startswith('group')}
-    # TODO participants should contain the user itself as well
     assert all([len(data.get('participants')) > 2 for data in groups.values()])
diff --git a/tests/test_Friends.py b/tests/test_Friends.py
index 652b671..c6abfea 100644
--- a/tests/test_Friends.py
+++ b/tests/test_Friends.py
@@ -1,8 +1,9 @@
 import pytest
+import os
 
 from miner.Friends import Friends
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
 
 
 @pytest.fixture()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3e57e17..3577add 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -69,20 +69,20 @@ def test_generate_date_series():
     start = datetime(2020, 1, 1, 0, 0)
     end = datetime(2021, 1, 1, 0, 0)
 
-    date_range_year = generate_date_series('y', start, end)
+    date_range_year = generate_date_series(period='y', start=start, end=end)
     assert len(date_range_year) == 1 + 1
 
-    date_range_month = generate_date_series('m', start, end)
+    date_range_month = generate_date_series(period='m', start=start, end=end)
     assert len(date_range_month) == 12 + 1
 
-    date_range_day = generate_date_series('d', start, end)
+    date_range_day = generate_date_series(period='d', start=start, end=end)
     assert len(date_range_day) == 366 + 1
 
-    date_range_hour = generate_date_series('h', start, end)
+    date_range_hour = generate_date_series(period='h', start=start, end=end)
     assert len(date_range_hour) == (366 * 24) + 1
 
     for day in date_range_day:
         assert isinstance(day, datetime)
 
     with pytest.raises(ValueError):
-        faulty_date_range = generate_date_series(start, end, )
+        faulty_date_range = generate_date_series(start=start, end=end, )