diff --git a/.gitignore b/.gitignore
index 5b8858a..4755795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -228,24 +228,19 @@ dmypy.json
 
 # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebook,pycharm,visualstudiocode
 
+# ignoring data folder
+data
 
-# ignoring data
-data 
+# ignoring jupyter notebook
+tests/playground.py
 
-# ignoring todo 
+# ignoring various files created during development
+plots
+*.png
 todo.md
-
-
-# ignoring trash file
 trash.py
-
-
-# ignoring jupyter notebook
 explore.ipynb
-
-
-# ignoring jupyter notebook
-tests/playground.py
+tests/test_data/messages/inbox/private_messages.json
 
 tests/.pytest_cache
 .pytest_cache
\ No newline at end of file
diff --git a/ConversationAnalyzer.py b/ConversationAnalyzer.py
deleted file mode 100644
index cfe1a95..0000000
--- a/ConversationAnalyzer.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import pandas as pd
-from utils import date_checker, period_checker, subject_checker, generate_time_series, get_stats_for_intervals
-
-
-class ConversationAnalyzer:
-    def __new__(cls, name, messages, *args, **kwargs):
-        if messages is None:  # This deals with the case if no messages
-            return None
-        return super(ConversationAnalyzer, cls).__new__(cls, *args, **kwargs)
-
-    def __init__(self, name, messages):
-        self.name = name
-        self.df = messages
-
-    def __str__(self):
-        return f'{self.name}: {list(self.df.index)}'
-
-    @property
-    def stats(self):
-        return self.get_stats(self.df)
-
-    def get_stats(self, df, subject='all', start=None, end=None, period=None):
-        df = self.filter_by_input(df, subject=subject, start=start, end=end, period=period)
-        stats = ConversationStats(df)
-        return stats
-
-    def get_time_series_data(self, subject='all', **kwargs):
-        time_series = generate_time_series(**kwargs)
-        return get_stats_for_intervals(self.get_stats, self.df, time_series, subject=subject)
-
-    @staticmethod
-    def get_plottable_time_series_data(interval_stats, statistic):
-        for k, v in interval_stats.items():
-            if isinstance(v, ConversationStats):
-                interval_stats[k] = getattr(v, statistic)
-        return interval_stats
-
-    @staticmethod
-    @subject_checker
-    @date_checker
-    @period_checker
-    def filter_by_input(df, subject='all', start=None, end=None, period=None):
-        if subject == 'me':
-            df = df[df.sender_name == 'Levente Csőke']
-        elif subject == 'partner':
-            df = df[df.sender_name != 'Levente Csőke']
-        if start and end:
-            df = df.loc[start:end]
-        elif start and not end:
-            df = df.loc[start:start + period]
-        elif not start and end:
-            df = df.loc[end - period:end]
-        return df
-
-
-class ConversationStats:
-    """
-    Statistics of conversation with one person.
-    """
-
-    # TODO do we need this or not?!?! smh
-    # def __new__(cls, df, *args, **kwargs):
-    #     if not len(df.index):  # This deals with the case if input df is empty
-    #         return None
-    #     return super(ConversationStats, cls).__new__(cls, *args, **kwargs)
-
-    def __init__(self, df):
-        self.df = df
-
-    def __repr__(self):
-        return f'{self.msg_count}'
-
-    @property
-    def messages(self):
-        return self.df.content.dropna()
-
-    @property
-    def words(self):
-        return self.get_words()
-
-    # 1.
-    @property
-    def msg_count(self):
-        return len(self.df)
-
-    # 2.
-    @property
-    def unique_msg_count(self):
-        return len(self.messages.unique())
-
-    # 3.
-    @property
-    def most_used_msgs(self):
-        # TODO LATER first few (1-10) messages
-        return self.messages.value_counts()
-
-    # 4.
-    @property
-    def msg_frequency(self):
-        # NOTE this has been most likely depracated OR?
-        pass
-
-    # 5.
-    @property
-    def word_count(self):
-        return len(self.words)
-
-    # 6.
-    @property
-    def unique_word_count(self):
-        return len(set(self.words))
-
-    # 7.
-    @property
-    def most_used_words(self):
-        s = pd.Series(self.words)
-        return s.value_counts()
-
-    # 8.
-    @property
-    def word_frequency(self):
-        pass
-
-    # 9.
-    @property
-    def char_count(self):
-        char_count = 0
-        for word in self.words:
-            char_count += len(word)
-        return char_count
-
-    # 10.
-    @property
-    def most_used_chars(self):
-        return None  # TODO LATER or not  https://stackoverflow.com/questions/4131123/finding-the-most-frequent-character-in-a-string
-
-    # 11.
-    @property
-    def rate_of_media_messages(self):
-        pass  # NOTE what?
-
-    def get_words(self):
-        token_list = self.messages.str.lower().str.split()
-        words = []
-        for tokens in token_list:
-            # print(tokens)
-            if not isinstance(tokens, list):
-                print('WARNING! Not a list!')
-                continue  # TODO ??? check this
-            for token in tokens:
-                words.append(token)
-        return words
diff --git a/Conversations.py b/Conversations.py
deleted file mode 100644
index 3fb1fbd..0000000
--- a/Conversations.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import os
-from FacebookData import FacebookData
-import pandas as pd
-
-from datetime import datetime
-
-MESSAGE_SUBPATH = 'messages/inbox'
-
-
-class Conversations:
-    def __init__(self, data_path):
-        self.data_path = f'{data_path}/{MESSAGE_SUBPATH}'
-
-    def get_people(self):
-        json_paths = self.walk_directory_and_search('.json')
-        return self.extract_names_from_convos(json_paths)
-
-    def walk_directory_and_search(self, extension):
-        paths = []
-        for root, dirs, files in os.walk(self.data_path):
-            for name in files:
-                if name.endswith(extension):
-                    paths.append(os.path.join(root, name))
-        return paths
-
-    # TODO simplify this function!! also this takes very long
-    @staticmethod
-    def extract_names_from_convos(jsons):
-        name_data_map = {}
-        count = 0
-        for file in jsons:
-            msg = Messages(file)
-            for participant in msg.participants:
-                key = participant if msg.ttype == 'Regular' else f'group_{count}'
-                if key == 'Facebook User':  # TODO ?? what to do with this??
-                    continue
-                if name_data_map.get(key) and key.startswith(
-                        'group'):  # making sure run only once even if it is a group
-                    continue
-                if name_data_map.get(key):
-                    dfs = [name_data_map[key]['messages'], msg.df]
-                    name_data_map[key]['messages'] = pd.concat(dfs, ignore_index=False).sort_index()
-                else:
-                    name_data_map[key] = {
-                        'title': msg.title,
-                        'compact_name': msg.compact_names,
-                        # 'participants': msg.participants + ['Levente Csőke'],
-                        'participants': msg.participants,
-                        'messages': msg.df,
-                        'friend': None,
-                        'messages_dir': msg.messages_dir,
-                        'media_dir': msg.media_dir
-                    }
-            if msg.ttype == 'RegularGroup':
-                count += 1
-
-        return name_data_map
-
-
-class Messages(FacebookData):
-    def __init__(self, json_path):
-        super().__init__(json_path)
-        self.to_df()
-        self.set_date_as_index()
-
-    def to_df(self):
-        self._df = pd.DataFrame(self.decoded.get('messages'))
-
-    def set_date_as_index(self):
-        # NOTE maybe not needed; could calculate real time
-        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
-        self._df = self._df.set_index(date_series).iloc[::-1]
-
-    @property
-    def names(self):
-        return pd.DataFrame(self.participants)[0]
-
-    @property
-    def participants(self):
-        participants = self.decoded.get('participants')
-        # TODO I should be IN
-        # but this breaks stuff at TestMessagingAnalyzer
-        return [p.get('name') for p in participants if p.get('name') != 'Levente Csőke']
-        # return [p.get('name') for p in participants if p.get('name')]
-
-    @property
-    def title(self):
-        return self.decoded.get('title')
-
-    @property
-    def ttype(self):
-        return self.decoded.get('thread_type')
-
-    @property
-    def messages_dir(self):
-        thread_path = self.decoded.get('thread_path')
-        if not thread_path.startswith('inbox/'):
-            raise ValueError('Something is not okay.')
-        # TODO here or in the upper function where we extract names
-        return thread_path.split('/')[1].lower()
-
-    @property
-    def media_dir(self):
-        # todo what should the path contain
-        for media in ['photos', 'gifs', 'files', 'videos', 'audio']:
-            if media in self._df.columns:
-                media_in_msg = list(self._df[media][self._df[media].notnull()])
-                # if len(media_in_msg) > 1:  # TODO is this ok. i think it is. think multiple photos sent once
-                #    print('Media in msg is bigger than 1')
-                uri = media_in_msg[0][0].get('uri')
-                return os.path.dirname(os.path.dirname(uri))
-        return None
-
-    @staticmethod
-    def ts_to_date(date):
-        return datetime.fromtimestamp(date / 1000)  # .strftime('%Y-%m-%d')
diff --git a/FacebookData.py b/FacebookData.py
deleted file mode 100644
index a82c896..0000000
--- a/FacebookData.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from utils import read_json, decode_text, accents_map
-
-
-class FacebookData:
-    def __init__(self, json_path):
-        self.json_path = json_path
-        self._df = None
-
-    @property
-    def df(self):
-        return self._df
-
-    @property
-    def decoded(self):
-        return decode_text(self.json)
-
-    @property
-    def json(self):
-        return read_json(self.json_path)
-
-    @property
-    def compact_names(self):
-        # NOTE this is the place where we change pd/np to builtin
-        # do we have to do this?
-        name_list = list(self.without_accent_and_whitespace(self.lower_names(self.names)))
-        return name_list[0] if len(name_list) == 1 else name_list
-
-    @staticmethod
-    def lower_names(col):
-        return col.str.lower()
-
-    @staticmethod
-    def without_accent_and_whitespace(col):
-        def replace_accents(text):
-            for char in accents_map.keys():
-                if char in text:
-                    text = text.replace(char, accents_map[char])
-            return text.replace(' ', '')
-
-        return col.apply(replace_accents)
diff --git a/Friends.py b/Friends.py
deleted file mode 100644
index 6e0e991..0000000
--- a/Friends.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-import os
-from FacebookData import FacebookData
-from utils import accents_map
-
-
-class Friends(FacebookData):
-
-    def __init__(self, *args):
-        super().__init__(*args)
-
-        # self.path = 'data/friends'
-        # self.json_path = f'{self.path}/friends.json'
-
-        self.to_df()
-
-    def get_people(self):
-        names = {}
-        for name, compact in zip(self.names, self.compact_names):
-            names[name] = {
-                'title': name,
-                'compact_name': compact,
-                'messages': None,
-                'friend': True,
-                'participants': None,
-                'messages_dir': None,
-                'media_dir': None
-            }
-        return names
-
-    def to_df(self):
-        self._df = pd.DataFrame(self.decoded.get('friends'))
-
-    @property
-    def names(self):
-        return self.df.name
diff --git a/Individual.py b/Individual.py
deleted file mode 100644
index a9f8d03..0000000
--- a/Individual.py
+++ /dev/null
@@ -1,47 +0,0 @@
-class Individual:
-    def __init__(self, name=None, title=None,compact=None, messages=None, friend=None, messages_dir=None, media_dir=None,
-                 member_of=None):
-        self._name = name
-        self._title = title
-        self._compact_name = compact
-        self._messages = messages
-        self._friend = friend
-        self._messages_dir = messages_dir
-        self._media_dir = media_dir
-        self._member_of = member_of
-
-
-    def __repr__(self):
-        return self.name
-
-    @property
-    def name(self):
-        return self._name
-
-    @property
-    def title(self):
-        return self._title
-
-    @property
-    def messages(self):
-        return self._messages
-
-    @property
-    def friend(self):
-        return self._friend
-
-    @property
-    def media_dir(self):
-        return self._media_dir
-
-    @property
-    def messages_dir(self):
-        return self._messages_dir
-
-    @property
-    def compact_name(self):
-        return self._compact_name
-
-    @property
-    def member_of(self):
-        return self._member_of
diff --git a/Me.py b/Me.py
deleted file mode 100644
index 3293bf7..0000000
--- a/Me.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from FacebookData import FacebookData
-
-
-class Me(FacebookData):
-    def __init__(self, *args):
-        super().__init__(*args)
-
-    @property
-    def name(self):
-        return 'Levente Csőke'
diff --git a/MessagingAnalyzer.py b/MessagingAnalyzer.py
deleted file mode 100644
index 0619505..0000000
--- a/MessagingAnalyzer.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from utils import year_converter, month_converter, generate_date_series, get_stats_for_intervals
-from datetime import datetime, date, timedelta
-import pandas as pd
-from ConversationAnalyzer import ConversationAnalyzer
-
-
-class MessagingAnalyzer:
-    def __init__(self, names, people):
-        # TODO input people only. class will know what to do
-        self.names = names
-        self.people = people
-
-    def time_series_analysis_for_all(self, subject=None, **kwargs):
-        time_series = generate_date_series(**kwargs)
-        stacked_df = self.stack_dfs(self.people)
-        interval_stats = get_stats_for_intervals(self.get_stats, stacked_df, time_series, subject=subject)
-
-    def get_stats(self, df, subject='all', start=None, end=None, period=None):
-        # TODO LATER
-        # here you have to do something with it
-        pass
-
-    def get_count(self, attribute, subject='all', start=None, end=None, period=None):
-        count = 0
-        # we have a list of names we want to iterate over
-        for name in self.names:
-            stats = self.get_conversation_stats(name=name, subject=subject, start=start, end=end, period=period)
-            if stats is not None:
-                count += getattr(stats, attribute)
-        return count
-
-    def get_conversation_stats(self, name, subject='all', start=None, end=None, period=None):
-        messages = self.people.get(name).messages
-        analyzer = ConversationAnalyzer(name, messages)
-        if analyzer is None:
-            return None
-        return analyzer.get_stats(messages, subject=subject, start=start, end=end, period=period)
-
-    def total_number_of_(self, attribute, subject='all', **kwargs):
-        return self.get_count(attribute=attribute, subject=subject, **kwargs)
-
-    # 1. Ranking of friends by total count of messages/words/characters (also by year/month/day/hour)
-    def total_number_of_messages(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', **kwargs)
-
-    def total_number_of_words(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', **kwargs)
-
-    def total_number_of_characters(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', **kwargs)
-
-    # 2. Ranking of friends who I sent the most messages/words/characters (also by year/month/day/hour)
-    def total_number_of_messages_sent(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', subject='me', **kwargs)
-
-    def total_number_of_words_sent(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', subject='me', **kwargs)
-
-    def total_number_of_characters_sent(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', subject='me', **kwargs)
-
-    # 3. Ranking of friends who sent the most messages/words/characters (also by year/month)
-    def total_number_of_messages_received(self, **kwargs):
-        return self.total_number_of_(attribute='msg_count', subject='partner', **kwargs)
-
-    def total_number_of_words_received(self, **kwargs):
-        return self.total_number_of_(attribute='word_count', subject='partner', **kwargs)
-
-    def total_number_of_characters_received(self, **kwargs):
-        return self.total_number_of_(attribute='char_count', subject='partner', **kwargs)
-
-    # 4. Most used messages/words in convos by me/partner (also by year/month/day/hour)
-    def most_used_messages_by_me(self, **kwargs):
-        """
-        >>> s1 = pd.Series([3, 1, 2, 3, 4, 1, 1])
-        >>> s2 = pd.Series([3, 2, 1, 1])
-        >>> s1_vc = s1.value_counts()
-        >>> s2_vc = s2.value_counts()
-        TODO LATER most used is already a problem:
-          - because its a series of all the unique messages/words ever used in a convo
-          - it contains strings like ':d', ':p' and 'xd'
-          - from all the convos the result of value_counts has to be cleared
-          and has to be truncated (that is not use the 200th most used word, only top10 let's say)
-          - then these series has to be merged in a way that the same string's counts are added up
-          - what about typos????!
-        """
-        pass
-
-    def most_used_messages_by_partners(self, **kwargs):
-        pass
-
-    def most_used_words_by_me(self, **kwargs):
-        pass
-
-    def most_used_words_by_partners(self, **kwargs):
-        pass
-
-    # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
-    def days_when_most_messages_sent(self):
-        # TODO LATER hard algorithmic problem
-        pass
-
-    def days_when_most_messages_received(self):
-        pass
-
-    def hours_when_most_messages_sent(self):
-        # TODO LATER
-        # is this referring to the absolute hour most messages sent??
-        # like: 2014.07.25. 15h-16h
-        # OR
-        # the pattern of most messages sent between this and this hours
-        # like: 20h-21h
-        # ACTUALLY BOTH
-        # for years/months/days/hours
-        # BUT this comes from the time series analysis
-        pass
-
-    def hours_when_most_messages_received(self):
-        pass
-
-    # 6. Time series: dict of 'year/month/day/hour : number of messages/words/characters (also sent/got) for user/all convos'
-    # TODO
-
-    @staticmethod
-    def stack_dfs(people):
-        dfs = []
-        for data in people.values():
-            if data.messages is not None:
-                dfs.append(data.messages)
-        return pd.concat(dfs).sort_index()
diff --git a/Miner.py b/Miner.py
deleted file mode 100644
index 3b32806..0000000
--- a/Miner.py
+++ /dev/null
@@ -1,45 +0,0 @@
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
-
-from People import People
-from ConversationAnalyzer import ConversationAnalyzer
-from MessagingAnalyzer import MessagingAnalyzer
-
-
-class Miner:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def analyze_messages():
-        p = People(path=DATA_PATH)
-
-        stats = {}
-
-        for name, person in p.individuals.items():
-            #assert name == person.name, 'ERRRRRRROR!!!'
-            if person.messages is None:
-                stats[person.name] = None
-                continue
-            analyzer = ConversationAnalyzer(person.name, person.messages)
-            stats[person.name] = analyzer.stats
-            # if stats[person.name].get('message_count').get('me') > 5000:
-            #    top[person.name] = stats[person.name]
-        example = stats['Dániel Nagy']
-        print()
-
-        # print('LEN: ', len(top.keys()))
-        # top_all = {name: data.get('message_count').get('all') for name, data in top.items()}
-        # analyzer.visualize_stats(top)
-
-    @staticmethod
-    def analyze_messaging():
-        p = People(path=DATA_PATH)
-
-        msg_analyzer = MessagingAnalyzer(p.names, p.individuals)
-
-        msgs = msg_analyzer.total_number_of_messages()
-
-
-if __name__ == '__main__':
-    m = Miner()
-    m.analyze_messages()
diff --git a/People.py b/People.py
deleted file mode 100644
index 11d1887..0000000
--- a/People.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from Individual import Individual
-from Conversations import Conversations
-from Friends import Friends
-
-# from Me import Me
-DATA_PATH = '/home/levente/projects/facebook-data-miner/data'
-import time
-from Group import Group
-
-
-# TODO we dont need both data and individuals... or??
-
-class People:
-    def __init__(self, path=None):
-        self.data_path = path if path else DATA_PATH
-        self._names = []
-        self._individuals = {}
-        self._groups = []
-        self._data = self.get_people()  # TODO is this supposed to be here or elsewhere
-        self.to_individuals()  # TODO is this supposed to be here or elsewhere
-
-    @property
-    def data(self):
-        return self._data
-
-    @property
-    def names(self):
-        return self._names
-
-    @property
-    def individuals(self):
-        return self._individuals
-
-    @property
-    def groups(self):
-        return self._groups
-
-    def get_people(self):
-        start = time.time()
-        friends = Friends(self.data_path + '/friends/friends.json')
-        people1 = friends.get_people()
-        print('friends: ', time.time() - start)
-
-        # TODO LATER too slow
-        # takes about 30 secs both
-        # read it once, store it in DB OR?
-        start = time.time()
-        conversations = Conversations(self.data_path)
-        people2 = conversations.get_people()
-        print('convos: ', time.time() - start)
-
-        return self.unify_people(people1, people2)
-
-    def to_individuals(self):  # TODO maybe rather split_convos or differentiate_convos
-        start = time.time()
-        for person, data in self._data.items():
-            if person.startswith('group'):
-                g = Group(name=data.get('name'), title=data.get('title'), messages=data.get('messages'),
-                          compact=data.get('compact_name'), messages_dir=data.get('messages_dir'),
-                          media_dir=data.get('media_dir'), members=None)
-                self._groups.append(g)
-            else:
-                indie = Individual(name=person, title=data.get('title'), messages=data.get('messages'),
-                                   compact=data.get('compact_name'), messages_dir=data.get('messages_dir'),
-                                   media_dir=data.get('media_dir'), member_of=None)
-                self._names.append(person)
-                self._individuals[person] = indie
-        print('indies: ', time.time() - start)
-
-    @staticmethod
-    def unify_people(friends, convos):
-        for person, data in friends.items():
-            if not convos.get(person):
-                convos[person] = data
-            convos[person]['friend'] = True
-        return convos
-
-# if __name__ == '__main__':
-#     p = People()
diff --git a/README.md b/README.md
index 943a123..c4b3b6a 100644
--- a/README.md
+++ b/README.md
@@ -11,4 +11,10 @@ More info soon...
 ## Contribution
 Help is more than welcome. If somebody feel the urge to contribute, I would share my plans with them.
 
-Ideas are welcome too. Feel free to open a new issue.
\ No newline at end of file
+Ideas are welcome too. Feel free to open a new issue.
+
+
+For running VIsualizer CLI:
+```shell script
+export PYTHONPATH="$PWD"
+```
diff --git a/__main__.py b/__main__.py
new file mode 100644
index 0000000..33f7113
--- /dev/null
+++ b/__main__.py
@@ -0,0 +1,5 @@
+from miner.App import App
+
+if __name__ == '__main__':
+    app = App()
+    app.analyze_messages()
\ No newline at end of file
diff --git a/miner/Analyzer.py b/miner/Analyzer.py
new file mode 100644
index 0000000..17d9f68
--- /dev/null
+++ b/miner/Analyzer.py
@@ -0,0 +1,127 @@
+import pandas as pd
+
+from miner.ConversationStats import ConversationStats
+from miner import utils
+
+
+class Analyzer:
+    """
+    Analyzer for analyzing specific and/or all conversations
+
+    """
+
+    def __init__(self, people):
+        self.people = people
+        self.people_data = people.data
+        self.names = list(people.names)
+        self.multi = len(self.people_data) > 1
+
+        if self.multi:
+            self.df = self.stack_dfs(self.people_data)
+        else:
+            self.df = self.people_data.get(self.names[0]).messages
+
+    def __str__(self):
+        if self.multi:
+            return self.names
+        else:
+            return f'{self.names[0]}: {list(self.df.index)}'
+
+    @property
+    def stats(self):
+        return self.get_stats()
+
+    def get_stats_for_intervals(self, time_series, period, subject='all'):
+        data = {}
+        for i in range(len(time_series)):
+            start = time_series[i]
+            try:  # with this solution we will have data for the very last moments until datetime.now()
+                end = time_series[i + 1]
+            except IndexError:
+                end = None
+            data[start] = self.get_stats(df=self.df, subject=subject, start=start, end=end, period=period)
+        return data
+
+    def get_stats(self, df=None, subject='all', start=None, end=None, period=None):
+        df = self.df if df is None else df
+        df = self.filter_by_input(df, subject=subject, start=start, end=end, period=period)
+        stats = ConversationStats(df)
+        return stats
+
+    # 1. Total count of messages/words/characters (also by year/month/day/hour)
+    # 2. Total count of messages/words/characters sent (also by year/month/day/hour)
+    # 3. Total count of messages/words/characters received (also by year/month)
+    def get_count(self, attribute, subject='all', start=None, end=None, period=None):
+        stats = self.get_stats(subject=subject, start=start, end=end, period=period)
+        return getattr(stats, attribute)
+
+    # 4. Most used messages/words in convos by me/partner (also by year/month/day/hour)
+    def most_used_messages_(self, **kwargs):
+        """
+        >>> s1 = pd.Series([3, 1, 2, 3, 4, 1, 1])
+        >>> s2 = pd.Series([3, 2, 1, 1])
+        >>> s1_vc = s1.value_counts()
+        >>> s2_vc = s2.value_counts()
+        TODO LATER most used is already a problem:
+          - because its a series of all the unique messages/words ever used in a convo
+          - it contains strings like ':d', ':p' and 'xd'
+          - from all the convos the result of value_counts has to be cleared
+          and has to be truncated (that is not use the 200th most used word, only top10 let's say)
+          - then these series has to be merged in a way that the same string's counts are added up
+          - what about typos????!
+        """
+        pass
+
+    # 5. Number of messages sent/got on busiest period (by year/month/day/hour)
+    def stat_per_period(self, period, statistic, **kwargs):
+        interval_stats = self.get_time_series_data(period, **kwargs)
+        time_series_data = self.get_stat_count(interval_stats, statistic=statistic)
+        return utils.count_stat_for_period(time_series_data, period)
+
+    # 6. Time series: dict of 'y/m/d/h : number of messages/words/characters (also sent/got) for user/all convos'
+    def get_time_series_data(self, period, subject='all', **kwargs):
+        time_series = utils.generate_date_series(period=period, **kwargs)
+        return self.get_stats_for_intervals(time_series, period, subject=subject)
+
+    # # 7. Ranking of partners by messages by y/m/d/h, by different stats, by sent/got
+    def get_ranking_of_partners_by_messages(self, statistic='msg_count', **kwargs):
+        count_dict = {}
+        for name in self.names:
+            df = self.df[self.df.partner == name]
+            stats = self.get_stats(df=df, **kwargs)
+            if stats is not None:
+                count_dict = utils.fill_dict(count_dict, name, getattr(stats, statistic))
+        return count_dict
+
+    @staticmethod
+    def stack_dfs(people_data):
+        dfs = []
+        for data in people_data.values():
+            if data.messages is not None:
+                dfs.append(data.messages)
+        return pd.concat(dfs).sort_index()
+
+    @staticmethod
+    @utils.attribute_checker
+    def get_stat_count(interval_stats, statistic='msg_count'):
+        for k, v in interval_stats.items():
+            interval_stats[k] = getattr(v, statistic)
+        return interval_stats
+
+    @staticmethod
+    @utils.subject_checker
+    @utils.date_checker
+    @utils.start_end_period_checker
+    def filter_by_input(df, subject='all', start=None, end=None, period=None):
+
+        if subject == 'me':
+            df = df[df.sender_name == 'Levente Csőke']
+        elif subject == 'partner':
+            df = df[df.sender_name != 'Levente Csőke']
+        if start and end:
+            df = df.loc[start:end]
+        elif start and not end:
+            df = df.loc[start:start + period]
+        elif not start and end:
+            df = df.loc[end - period:end]
+        return df
diff --git a/miner/App.py b/miner/App.py
new file mode 100644
index 0000000..7813f9e
--- /dev/null
+++ b/miner/App.py
@@ -0,0 +1,27 @@
+import os
+
+from miner.Analyzer import Analyzer
+from miner.People import People
+
+DATA_PATH = f'{os.getcwd()}/data'
+
+
+class App:
+    """
+    Entrypoint. Not yet used extensively.
+    # TODO LATER turn it into a cli
+    """
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def analyze_messages():
+        p = People(path=DATA_PATH)
+
+        analyzer = Analyzer(p)
+        rank = analyzer.get_ranking_of_partners_by_messages(attribute='char_count')
+
+
+if __name__ == '__main__':
+    app = App()
+    app.analyze_messages()
diff --git a/miner/ConversationStats.py b/miner/ConversationStats.py
new file mode 100644
index 0000000..040bd17
--- /dev/null
+++ b/miner/ConversationStats.py
@@ -0,0 +1,89 @@
+
+class ConversationStats:
+    """
+    Statistics of conversation with one person.
+    """
+
+    def __init__(self, df):
+        self.df = df
+
+    def __repr__(self):
+        return f'{self.msg_count}'
+
+    @property
+    def messages(self):
+        return self.df.content.dropna()
+
+    @property
+    def words(self):
+        return self.get_words()
+
+    # 1.
+    @property
+    def msg_count(self):
+        return len(self.df)
+
+    # 2.
+    @property
+    def unique_msg_count(self):
+        return len(self.messages.unique())
+
+    # 3.
+    @property
+    def most_used_msgs(self):
+        return self.messages.value_counts()
+
+    # 4.
+    @property
+    def msg_frequency(self):
+        # NOTE this has been most likely depracated OR?
+        pass
+
+    # 5.
+    @property
+    def word_count(self):
+        return len(self.words)
+
+    # 6.
+    @property
+    def unique_word_count(self):
+        return len(set(self.words))
+
+    # 7.
+    @property
+    def most_used_words(self):
+        return pd.Series(self.words).value_counts()
+
+    # 8.
+    @property
+    def word_frequency(self):
+        pass
+
+    # 9.
+    @property
+    def char_count(self):
+        char_count = 0
+        for word in self.words:
+            char_count += len(word)
+        return char_count
+
+    # 10.
+    @property
+    def rate_of_media_messages(self):
+        """
+        TODO LATER
+        search for media messages all 5 of them
+        rate is only the second or third abstraction
+        """
+        pass
+
+    def get_words(self):
+        token_list = self.messages.str.lower().str.split()
+        words = []
+        for tokens in token_list:
+            if not isinstance(tokens, list):
+                print('WARNING! Not a list!')
+                continue
+            for token in tokens:
+                words.append(token)
+        return words
diff --git a/miner/Conversations.py b/miner/Conversations.py
new file mode 100644
index 0000000..b4a4381
--- /dev/null
+++ b/miner/Conversations.py
@@ -0,0 +1,108 @@
+import pandas as pd
+import os
+
+from miner.Messages import Messages
+from miner.Individual import Individual
+
+from miner import utils
+
+
+class Conversations:
+    """
+    Class for managing and parsing conversations
+    """
+
+    def __init__(self, data_path):
+        self.private_convo_paths = {}
+        self.group_convo_paths = {}  # TODO LATER fill this as well
+        self.deleted_user_convo_paths = []  # NOTE these are collected but not yet used
+
+        self.data_path = f'{data_path}/{utils.MESSAGE_SUBPATH}'
+        self.order_paths()
+
+    def order_paths(self):
+        paths_map = f'{self.data_path}/private_messages.json'
+        if os.path.isfile(paths_map):
+            self.read_paths(paths_map)
+            return
+        json_paths = utils.walk_directory_and_search(self.data_path, '.json', contains_string='message_')
+        self.differentiate_paths(json_paths)
+        self.register_paths()
+
+    def differentiate_paths(self, jsons):
+        for file in jsons:
+            msg = Messages(file)
+            if msg.title == 'Facebook User':
+                self.deleted_user_convo_paths.append(msg.messages_dir)
+            elif msg.ttype == 'RegularGroup':
+                self.map_group_convo_files(msg, file)
+            elif msg.ttype == 'Regular':
+                # self.private_convo_paths[msg.title] = msg.messages_dir
+                self.map_private_convo_files(msg, file)
+            else:
+                raise ValueError('Should not happen!')
+
+    def register_paths(self):
+        utils.dump_to_json(self.private_convo_paths, f'{self.data_path}/private_messages.json')
+
+    def read_paths(self, file):
+        self.private_convo_paths = utils.read_json(file)
+        print()
+
+    def map_private_convo_files(self, msg, file):
+        name = msg.title
+        if self.private_convo_paths.get(name):
+            self.private_convo_paths[name].append(file)
+        else:
+            self.private_convo_paths[name] = [file]
+
+    def map_group_convo_files(self, msg, file):
+        for participant in msg.participants:
+            if participant == 'Levente Csőke':
+                continue
+            if self.group_convo_paths.get(file):
+                self.group_convo_paths[file].append(participant)
+            else:
+                self.group_convo_paths[file] = [participant]
+
+    def get_people_from_private_messages(self, name=None, membership=True):
+        name_data_map = {}
+        convo_path_map = self.filter_by_name(name) if name is not None else self.private_convo_paths.values()
+        for paths in convo_path_map:
+            for file in paths:
+                messages = Messages(file)
+                name = messages.title
+                if name_data_map.get(name) is not None:
+                    dfs = [name_data_map[name].messages, messages.df]
+                    name_data_map[name].messages = pd.concat(dfs).sort_index()
+                else:
+                    name_data_map[name] = self.create_individual(messages, membership=membership)
+        return name_data_map
+
+    def filter_by_name(self, name):
+        filtered_paths = []
+        names = []
+        if isinstance(name, str):
+            names = [name]
+        elif isinstance(name, list):
+            names = name
+        for name in names:
+            filtered_paths.append(self.private_convo_paths.get(name))
+        return filtered_paths
+
+    def create_individual(self, messages, membership=None):
+        return Individual(
+            name=messages.title,
+            compact=messages.compact_names,
+            messages=messages.df,
+            messages_dir=messages.messages_dir,
+            media_dir=messages.media_dir,
+            member_of=self.group_membership(messages.title) if membership else None,
+        )
+
+    @staticmethod
+    def group_membership(name):
+        return None
+
+    def get_people_from_group_messages(self):
+        pass
diff --git a/miner/FacebookData.py b/miner/FacebookData.py
new file mode 100644
index 0000000..ef5ba78
--- /dev/null
+++ b/miner/FacebookData.py
@@ -0,0 +1,32 @@
+from miner import utils
+import pandas as pd
+
+
+class FacebookData:
+    """
+    Base class for reading in tabular data from JSONs.
+    """
+
+    def __init__(self, json_path):
+        self.json_path = json_path
+        self._df = None
+
+    @property
+    def df(self):
+        return self._df
+
+    @property
+    def decoded(self):
+        return utils.decode_text(self.json)
+
+    @property
+    def json(self):
+        return utils.read_json(self.json_path)
+
+    @property
+    def compact_names(self):
+        name_list = list(utils.without_accent_and_whitespace(utils.lower_names(self.names)))
+        return name_list[0] if len(name_list) == 1 else name_list
+
+    def to_df(self, field=None):
+        self._df = pd.DataFrame(self.decoded.get(field))
diff --git a/miner/Friends.py b/miner/Friends.py
new file mode 100644
index 0000000..5acc1be
--- /dev/null
+++ b/miner/Friends.py
@@ -0,0 +1,28 @@
+from miner.FacebookData import FacebookData
+from miner.Individual import Individual
+
+
+class Friends(FacebookData):
+    """
+    Class for storing data in friends.json
+    """
+
+    def __init__(self, *args):
+        super().__init__(*args)
+        self.to_df('friends')
+
+    def get_people(self, name=None):
+        names = {}
+        for full_name, compact in zip(self.names, self.compact_names):
+            if name is not None and name != full_name:  # filtering for name
+                continue
+            names[full_name] = Individual(
+                name=full_name,
+                compact=compact,
+                friend=True,
+            )
+        return names
+
+    @property
+    def names(self):
+        return self.df.name
diff --git a/Group.py b/miner/Group.py
similarity index 85%
rename from Group.py
rename to miner/Group.py
index 2152d77..3a1b131 100644
--- a/Group.py
+++ b/miner/Group.py
@@ -1,9 +1,13 @@
-# TODO LATER groups should be searched by looking into jsons unfortunately :(
+# NOTE groups should be searched by looking into jsons unfortunately :(
 # because of directory says others
 # maybe we dont use groups right away?
 
 
 class Group:
+    """
+    Class for holding a group-message's data
+    """
+
     def __init__(self, name=None, title=None, messages=None, compact=None, messages_dir=None, media_dir=None,
                  members=None):
         self._name = name
@@ -26,10 +30,6 @@ def title(self):
     def messages(self):
         return self._messages
 
-    # @property
-    # def get_message_jsons(self):
-    #     return self._messages
-
     @property
     def media_dir(self):
         return self._media_dir
diff --git a/miner/Individual.py b/miner/Individual.py
new file mode 100644
index 0000000..6f818ab
--- /dev/null
+++ b/miner/Individual.py
@@ -0,0 +1,61 @@
+class Individual:
+    """
+    Class for holding a person's data the user ever interacted with
+    """
+
+    def __init__(self, name=None, compact=None, messages=None, friend=None, messages_dir=None,
+                 media_dir=None,
+                 member_of=None):
+        self._name = name
+        self._compact_name = compact
+        self._messages = messages
+        self._friend = friend
+        self._messages_dir = messages_dir
+        self._media_dir = media_dir
+        self._member_of = member_of
+
+    def __repr__(self):
+        return f'{self.name}, messages: {self.messages}'
+
+    def __add__(self, other):
+        return Individual(
+            name=self.name if self.name else other.name,
+            friend=self.friend if self.friend else other.friend,
+            compact=self.compact_name if self.compact_name else other.compact_name,
+            messages=self.messages if len(self.messages) else other.messages,
+            messages_dir=self.messages_dir if self.messages_dir else other.messages_dir,
+            media_dir=self.media_dir if self.media_dir else other.media_dir,
+            member_of=self.member_of if self.member_of else other.member_of
+        )
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def messages(self):
+        return self._messages
+
+    @messages.setter
+    def messages(self, df):
+        self._messages = df
+
+    @property
+    def friend(self):
+        return self._friend
+
+    @property
+    def media_dir(self):
+        return self._media_dir
+
+    @property
+    def messages_dir(self):
+        return self._messages_dir
+
+    @property
+    def compact_name(self):
+        return self._compact_name
+
+    @property
+    def member_of(self):
+        return self._member_of
diff --git a/miner/Me.py b/miner/Me.py
new file mode 100644
index 0000000..ef2179e
--- /dev/null
+++ b/miner/Me.py
@@ -0,0 +1,14 @@
+from miner.FacebookData import FacebookData
+
+
+class Me(FacebookData):
+    """
+    Class for storing basic data about the user
+    """
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    @property
+    def name(self):
+        return ''
diff --git a/miner/Messages.py b/miner/Messages.py
new file mode 100644
index 0000000..ebdaadf
--- /dev/null
+++ b/miner/Messages.py
@@ -0,0 +1,64 @@
+from datetime import datetime
+import pandas as pd
+import os
+
+from miner.FacebookData import FacebookData
+from miner import utils
+
+
+class Messages(FacebookData):
+    """
+    Class for representing data of all the messages with a user or a group
+    """
+
+    def __init__(self, json_path):
+        super().__init__(json_path)
+        self.to_df('messages')
+        self.set_date_as_index()
+        self.add_partner_column()
+
+    @property
+    def names(self):
+        try:
+            return pd.DataFrame(self.participants)[0]
+        except KeyError:
+            return pd.Series({0: 'Facebook User'})
+
+    @property
+    def participants(self):
+        participants = self.decoded.get('participants')
+        return [p.get('name') for p in participants if p.get('name')]
+
+    @property
+    def title(self):
+        return self.decoded.get('title')
+
+    @property
+    def ttype(self):
+        return self.decoded.get('thread_type')
+
+    @property
+    def messages_dir(self):
+        thread_path = self.decoded.get('thread_path')
+        if not thread_path.startswith('inbox/'):
+            raise ValueError('Field `thread_path` should start with `inbox/`.')
+        return thread_path.split('inbox/')[1]
+
+    @property
+    def media_dir(self):
+        for media in utils.MEDIA_DIRS:
+            if media in self._df.columns:
+                media_in_msg = list(self._df[media][self._df[media].notnull()])
+                uri = media_in_msg[0][0].get('uri')
+                return os.path.dirname(os.path.dirname(uri)).split('inbox/')[1]
+
+    def set_date_as_index(self):
+        date_series = self._df.timestamp_ms.apply(self.ts_to_date)
+        self._df = self._df.set_index(date_series).iloc[::-1]
+
+    def add_partner_column(self):
+        self._df['partner'] = self.title
+
+    @staticmethod
+    def ts_to_date(date):
+        return datetime.fromtimestamp(date / 1000)
diff --git a/miner/People.py b/miner/People.py
new file mode 100644
index 0000000..b7852e8
--- /dev/null
+++ b/miner/People.py
@@ -0,0 +1,56 @@
+import time
+import os
+
+from miner.Conversations import Conversations
+from miner.Friends import Friends
+
+DATA_PATH = f'{os.getcwd()}/data'
+
+
+class People:
+    """
+    Class that manages and represents people from different kind of interactions
+    # TODO LATER abstractional flaw?! people? person? indie?
+    """
+
+    def __init__(self, path=None, name=None):
+        self.data_path = path if path else DATA_PATH
+        self._groups = []
+        self._data = self.get_people(name=name)
+        self._names = self.data.keys()
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def names(self):
+        return self._names  # if len(self._names) > 1 else self._names[0]
+
+    @property
+    def groups(self):
+        return self._groups
+
+    def get_people(self, name=None):
+        start = time.time()
+        friend = Friends(self.data_path + '/friends/friends.json')
+        friends = friend.get_people(name=name)
+        print('friends: ', time.time() - start)
+
+        start = time.time()
+        conversations = Conversations(self.data_path)
+        print('convos1: ', time.time() - start)
+        start = time.time()
+        individuals = conversations.get_people_from_private_messages(name=name)
+        print('convos2: ', time.time() - start)
+
+        return self.unify_people(friends, individuals)
+
+    @staticmethod
+    def unify_people(friends, convo_partners):
+        for person, friend in friends.items():
+            if not convo_partners.get(person):
+                convo_partners[person] = friend
+            else:
+                convo_partners[person] = convo_partners.get(person) + friend
+        return convo_partners
diff --git a/Visualizer.py b/miner/Visualizer.py
similarity index 88%
rename from Visualizer.py
rename to miner/Visualizer.py
index 052ecb3..440c3e7 100644
--- a/Visualizer.py
+++ b/miner/Visualizer.py
@@ -1,8 +1,8 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
-from People import People
-from ConversationAnalyzer import ConversationAnalyzer
+from miner.People import People
+from miner.ConversationAnalyzer import ConversationAnalyzer
 
 # plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120})
 
@@ -22,7 +22,7 @@ def plot_convos(self, names):
 
     @staticmethod
     def set_up_data(people, name, period='y'):
-        analyzer = ConversationAnalyzer(name, people.individuals.get(name).messages)
+        analyzer = ConversationAnalyzer(name, people.data.get(name).messages)
         interval_stats = analyzer.get_time_series_data(subject='all', start=None, end=None, period=period)
         return analyzer.get_plottable_time_series_data(interval_stats, statistic='msg_count')
 
diff --git a/tests/TestMessages.py b/miner/__init__.py
similarity index 100%
rename from tests/TestMessages.py
rename to miner/__init__.py
diff --git a/miner/requirements.txt b/miner/requirements.txt
new file mode 100644
index 0000000..8ee3351
--- /dev/null
+++ b/miner/requirements.txt
@@ -0,0 +1,5 @@
+numpy==1.18.1
+pandas==1.0.3
+dateparser==0.7.6
+matplotlib==3.2.1
+python_dateutil==2.8.1
diff --git a/utils.py b/miner/utils.py
similarity index 55%
rename from utils.py
rename to miner/utils.py
index 2a48624..6f6f565 100644
--- a/utils.py
+++ b/miner/utils.py
@@ -1,28 +1,28 @@
-import json
-import pandas as pd
-import dateparser
-from datetime import datetime, timedelta
 from dateutil.relativedelta import relativedelta
+from datetime import datetime, timedelta
+import dateparser
+from itertools import islice
+import json
+import os
 
+MESSAGE_SUBPATH = 'messages/inbox'
+MEDIA_DIRS = ['photos', 'gifs', 'files', 'videos', 'audio']
 MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october',
           'november', 'december']
-
-
-def read_json(file):
-    with open(file) as f:
-        return json.load(f)
-
-
-def dump_to_json(data=None, file=None):
-    with open(file, 'w') as f:
-        json.dump(data, f)
-
-
-def order_list_of_dicts(lst, key='timestamp_ms'):
-    return sorted(lst, key=lambda k: k[key])
-
-
-accents_map = {
+WEEKDAYS = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
+PERIOD_MAP = {
+    'y': None,
+    'm': MONTHS,
+    'd': WEEKDAYS,
+    'h': None,
+}
+DELTA_MAP = {
+    'y': relativedelta(years=+1),
+    'm': relativedelta(months=+1),
+    'd': timedelta(days=1),
+    'h': timedelta(hours=1)
+}
+ACCENTS_MAP = {
     "á": "a",
     "é": "e",
     "í": "i",
@@ -32,19 +32,21 @@ def order_list_of_dicts(lst, key='timestamp_ms'):
     "ú": "u",
     "ü": "u",
     "ű": "u",
-    # "Á": "A",
-    # "É": "E",
-    # "Í": "I",
-    # "Ó": "O",
-    # "Ö": "O",
-    # "Ő": "O",
-    # "Ú": "U",
-    # "Ü": "U",
-    # "Ű": "U",
 }
 
 
-#
+def read_json(file):
+    with open(file) as f:
+        return json.load(f)
+
+
+def dump_to_json(data=None, file=None):
+    with open(file, 'w', encoding='utf8') as f:
+        json.dump(data, f, ensure_ascii=False)
+
+
+def order_list_of_dicts(lst, key='timestamp_ms'):
+    return sorted(lst, key=lambda k: k[key])
 
 
 def year_converter(func):
@@ -110,19 +112,10 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-DELTA_MAP = {
-    'y': relativedelta(years=+1),
-    'm': relativedelta(months=+1),
-    'd': timedelta(days=1),
-    'h': timedelta(hours=1)
-}
-
-
-def period_checker(func):
+def start_end_period_checker(func):
     def wrapper(*args, **kwargs):
         if kwargs.get('start') is not None and kwargs.get('end') is not None:
             return func(*args, **kwargs)
-
         if not kwargs.get('period') or DELTA_MAP[kwargs.get('period')] is None:
             raise ValueError('Parameter `period` should be one of {y, m, d, h}')
         kwargs['period'] = DELTA_MAP[kwargs.get('period')]
@@ -131,33 +124,38 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def generate_date_series(start=None, end=None, period=None):
-    if period is None or DELTA_MAP.get(period) is None:
-        raise ValueError('Parameter `period` should be one of {y, m, d, h}')
-    start = start or datetime(year=2009, month=10, day=2, hour=0) # TODO change this to date when user joined FB
-    end = end or datetime.now()
+def period_checker(func):
+    def wrapper(*args, **kwargs):
+        if not kwargs.get('period') or DELTA_MAP[kwargs.get('period')] is None:
+            raise ValueError('Parameter `period` should be one of {y, m, d, h}')
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def get_start_based_on_period(join_date, period):
+    if period == 'y':
+        return datetime(join_date.year, 1, 1)
+    elif period == 'm':
+        return datetime(join_date.year, join_date.month, 1)
+    return join_date
+
 
+@period_checker
+def generate_date_series(period='y', start=None, end=None):
     dates = []
+
+    join_date = datetime(year=2009, month=10, day=2)  # TODO later get this from somewhere
+    start = start or get_start_based_on_period(join_date, period)
+    end = end or datetime.now()
+
     intermediate = start
-    while intermediate <= end:
+    while intermediate <= end:  # means that we want to have the end in it as well
         dates.append(intermediate)
         intermediate = intermediate + DELTA_MAP.get(period)
     return dates
 
 
-def get_stats_for_intervals(func, df, time_series, subject='all'):
-    data = {}
-    for offset, series in time_series.items():
-        data[offset] = {}
-        for i in range(len(series) - 1):  # only looping len - 1 times
-            start = series[i]
-            # TODO LATER will we miss the last entry? I dont think so (99%), but check and correct hand in hand with the timeseries bug
-            # IT DOES NOT! HOWEVER test it with new data injected/modified at runtime <- this is hard
-            end = series[i + 1]
-            data[offset][start] = func(df, subject=subject, start=start, end=end)
-    return data
-
-
 def dt(year: int = 2004, month: int = 1, day: int = 1, hour: int = 0):
     return datetime(year=year, month=month, day=day, hour=hour)
 
@@ -186,3 +184,82 @@ def decode_text(obj):
         return {key: decode_text(item) for key, item in obj.items()}
 
     return obj
+
+
+def lower_names(col):
+    return col.str.lower()
+
+
+def replace_accents(text):
+    for char in ACCENTS_MAP.keys():
+        if char in text:
+            text = text.replace(char, ACCENTS_MAP[char])
+    return text.replace(' ', '')
+
+
+def without_accent_and_whitespace(col):
+    return col.apply(replace_accents)
+
+
+def walk_directory_and_search(path, extension, contains_string=None):
+    paths = []
+    for root, dirs, files in os.walk(path):
+        for file_name in files:
+            if file_name.endswith(extension):
+                if contains_string is not None and contains_string in file_name:
+                    paths.append(os.path.join(root, file_name))
+    return paths
+
+
+def fill_dict(dictionary, key, value):
+    if dictionary.get(key) is not None:
+        dictionary[key] += value
+    else:
+        dictionary[key] = value
+    return dictionary
+
+
+def month_sorter(x):
+    return MONTHS.index(x[0])
+
+
+def count_stat_for_period(data, period):
+    # TODO sort by lists
+    periods = {}
+    for key, value in data.items():
+        if period == 'y':
+            periods = fill_dict(periods, key.year, value)
+            periods = dict(sorted(periods.items()))
+        elif period == 'm':
+            periods = fill_dict(periods, MONTHS[key.month - 1], value)
+            periods = dict(sorted(periods.items(), key=lambda x: MONTHS.index(x[0])))
+        elif period == 'd':
+            periods = fill_dict(periods, WEEKDAYS[key.weekday()], value)
+            periods = dict(sorted(periods.items(), key=lambda x: WEEKDAYS.index(x[0])))
+        elif period == 'h':
+            periods = fill_dict(periods, key.hour, value)
+            periods = dict(sorted(periods.items()))
+    return periods
+
+
+def sort_dict(dictionary, func=lambda x: x, reverse=False):
+    return {key: value for key, value in sorted(dictionary.items(), key=func, reverse=reverse)}
+
+
+def remove_items_where_value_is_falsible(dictionary):
+    return {k: v for k, v in dictionary.items() if v}
+
+
+# keep only first 20 entries
+def slice_dict(dictionary, n):
+    return dict(islice(dictionary.items(), n))
+
+
+def attribute_checker(func):
+    def wrapper(*args, **kwargs):
+        statistic = kwargs.get('statistic')
+        if not statistic or statistic not in ('msg_count', 'word_count', 'char_count'):
+            raise ValueError('Parameter `statistic` should be one of {msg_count, word_count, char_count}')
+        return func(*args, **kwargs)
+
+    return wrapper
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..76f8eef
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+seaborn==0.10.1
+dateparser==0.7.6
+pandas==1.0.3
+matplotlib==3.2.1
+python_dateutil==2.8.1
diff --git a/tests/TestConversations.py b/tests/TestConversations.py
deleted file mode 100644
index e198dc9..0000000
--- a/tests/TestConversations.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pandas as pd
-import pytest
-from Conversations import Conversations
-import os
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
-
-
-@pytest.fixture()
-def convos():
-    convo = Conversations(f'{TEST_DATA_PATH}')
-    return convo.get_people()
-
-
-def test_get_all_people_from_convo(convos):
-    people = []
-
-    for convo in convos.keys():
-        if convo.startswith('group'):
-            people += [p for p in convos[convo].get('participants')]
-        else:
-            people.append(convo)
-    people = list(set(people))
-
-    expected = ['Dér Dénes', 'Facebook User', 'Foo Bar', 'John Doe', 'Teflon Musk', 'Benedek Elek', 'Donald Duck',
-                'Tőke Hal']
-    # TODO LATER what to do with Facebook User??????
-    assert sorted(people) == sorted(expected)
-
-
-def test_all_convos_have_dir(convos):
-    assert all([data.get('messages_dir') for data in convos.values()])
-
-
-def test_all_convos_have_messages_df(convos):
-    assert all([isinstance(data.get('messages'), pd.DataFrame) for data in convos.values()])
-
-
-def test_some_convos_as_media_dir(convos):
-    assert convos.get('Teflon Musk').get('media_dir')
-    assert not convos.get('Benedek Elek').get('media_dir')
-
-def test_convo_media_has_one_folder_of_possibles(convos):
-    listed_dir = os.listdir(f"{TEST_DATA_PATH}/{convos.get('Teflon Musk').get('media_dir')}")
-    assert 'files' in listed_dir
-    assert 'photos' in listed_dir
-    assert 'audio' not in listed_dir
-
-def test_groups_have_more_than_two_participates(convos):
-    groups = {convo: data for convo, data in convos.items() if convo.startswith('group')}
-    # TODO participants should contain the user itself as well
-    assert all([len(data.get('participants')) > 2 for data in groups.values()])
-
-
-
-"""
-testcases:
-- individual convos contain all names, compact_names, message folders and media folders
-  - media folders are a big question. how do you get it? actually once you have the thread_path then from that you can guess,
-  OR better off use the uri in the messages... fuck seems complicated
-- friends contain all names and compact names,
-- convos and friends has a common set, and the set is identical
-- people gets assigned with all the unique friends and individual/group convos
-
-gonna test:
-- assigning messages to friends,
-- deal with multiple directories, IF there are multiple directories,
-- 
-concerns:
-- what to do with non-friends,
-- I assume multiple directories are because of files sent,
-"""
diff --git a/tests/TestMessagingAnalyzer.py b/tests/TestMessagingAnalyzer.py
deleted file mode 100644
index b803693..0000000
--- a/tests/TestMessagingAnalyzer.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import pytest
-from MessagingAnalyzer import MessagingAnalyzer
-from utils import dt
-
-@pytest.fixture(scope='session')
-def analyzer(people):
-    return MessagingAnalyzer(people.names, people.individuals)
-
-
-def test_total_number_of_messages(analyzer):
-    assert analyzer.total_number_of_messages() == 29
-
-    assert analyzer.total_number_of_messages(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2014), period='y') == 11
-    assert analyzer.total_number_of_messages(start=dt(year=2018), period='y') == 3
-    assert analyzer.total_number_of_messages(start=dt(year=2020), period='y') == 15
-
-    assert analyzer.total_number_of_messages(start=dt(year=2011, month=11), period='m') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=9), period='m') == 1
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=11), period='m') == 8
-    assert analyzer.total_number_of_messages(start=dt(year=2014, month=12), period='m') == 2
-
-    assert analyzer.total_number_of_messages(start=dt(year=2018, month=1), period='m') == 3
-    assert analyzer.total_number_of_messages(start=dt(year=2018, month=5), period='m') == 0
-
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2), period='m') == 10
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=3), period='m') == 1  # jpg
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=4), period='m') == 2
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=8), period='m') == 1
-
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13), period='d') == 2
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 2
-
-    assert analyzer.total_number_of_messages(start=dt(year=2020, month=2, day=13, hour=6), period='d') == 4
-
-
-def test_total_number_of_words(analyzer):
-    assert analyzer.total_number_of_words() == 86
-
-    assert analyzer.total_number_of_words(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2014), period='y') == 20
-    assert analyzer.total_number_of_words(start=dt(year=2018), period='y') == 32
-    assert analyzer.total_number_of_words(start=dt(year=2020), period='y') == 34
-
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=9), period='m') == 6
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=11), period='m') == 13
-    assert analyzer.total_number_of_words(start=dt(year=2014, month=12), period='m') == 1
-
-    assert analyzer.total_number_of_words(start=dt(year=2018, month=1), period='m') == 32
-    assert analyzer.total_number_of_words(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2), period='m') == 27
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=4), period='m') == 4
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=8), period='m') == 2
-
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2, day=13), period='d') == 14
-    assert analyzer.total_number_of_words(start=dt(year=2020, month=2, day=13, hour=5), period='d') == 14
-
-
-def test_total_number_of_characters(analyzer):
-    assert analyzer.total_number_of_characters() == 379
-
-    assert analyzer.total_number_of_characters(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2014), period='y') == 69
-    assert analyzer.total_number_of_characters(start=dt(year=2018), period='y') == 170
-    assert analyzer.total_number_of_characters(start=dt(year=2020), period='y') == 140
-
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=9), period='m') == 24
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=11), period='m') == 42
-    assert analyzer.total_number_of_characters(start=dt(year=2014, month=12), period='m') == 3
-
-    assert analyzer.total_number_of_characters(start=dt(year=2018, month=1), period='m') == 170
-    assert analyzer.total_number_of_characters(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=2), period='m') == 114
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=4), period='m') == 17
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=5), period='m') == 4
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_characters(start=dt(year=2020, month=8), period='m') == 5
-
-
-def test_total_number_of_messages_sent(analyzer):
-    assert analyzer.total_number_of_messages_sent() == 17
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014), period='y') == 6
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018), period='y') == 2
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020), period='y') == 9
-
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=9), period='m') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2014, month=12), period='m') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018, month=1), period='m') == 2
-
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2011, month=11), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2018, month=5), period='m') == 0
-
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2), period='m') == 6
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=4), period='m') == 2
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=8), period='m') == 1
-
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13), period='d') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 1
-    assert analyzer.total_number_of_messages_sent(start=dt(year=2020, month=2, day=13, hour=18), period='h') == 0
-
-
-def test_total_number_of_words_sent(analyzer):
-    assert analyzer.total_number_of_words_sent() == 69
-
-    assert analyzer.total_number_of_words_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014), period='y') == 16
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018), period='y') == 31
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020), period='y') == 22
-
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=9), period='m') == 6
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=11), period='m') == 9
-    assert analyzer.total_number_of_words_sent(start=dt(year=2014, month=12), period='m') == 1
-
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018, month=1), period='m') == 31
-    assert analyzer.total_number_of_words_sent(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2), period='m') == 16
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=4), period='m') == 4
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=8), period='m') == 2
-
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13), period='d') == 5
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 5
-    assert analyzer.total_number_of_words_sent(start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
-
-
-def test_total_number_of_characters_sent(analyzer):
-    assert analyzer.total_number_of_characters_sent() == 311
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014), period='y') == 60
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018), period='y') == 167
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020), period='y') == 84
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=9), period='m') == 24
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=11), period='m') == 33
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2014, month=12), period='m') == 3
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018, month=1), period='m') == 167
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2), period='m') == 62
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=4), period='m') == 17
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=5), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=6), period='m') == 0
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=8), period='m') == 5
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=6), period='d') == 21
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=7), period='d') == 0
-
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=6), period='h') == 21
-    assert analyzer.total_number_of_characters_sent(start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
-
-
-def test_total_number_of_messages_received(analyzer):
-    assert analyzer.total_number_of_messages_received() == 12
-    assert analyzer.total_number_of_messages_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014), period='y') == 5
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018), period='y') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020), period='y') == 6
-
-    assert analyzer.total_number_of_messages_received(start=dt(year=2011, month=11), period='m') == 0
-
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_messages_received(start=dt(year=2014, month=12), period='m') == 1
-
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018, month=1), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2018, month=5), period='m') == 0
-
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2), period='m') == 4
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=3), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=4), period='m') == 0
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=5), period='m') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=8), period='m') == 0
-
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=13), period='d') == 1
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=14), period='d') == 2
-    assert analyzer.total_number_of_messages_received(start=dt(year=2020, month=2, day=18), period='d') == 1
-
-
-def test_total_number_of_words_received(analyzer):
-    assert analyzer.total_number_of_words_received() == 17
-
-    assert analyzer.total_number_of_words_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2014), period='y') == 4
-    assert analyzer.total_number_of_words_received(start=dt(year=2018), period='y') == 1
-    assert analyzer.total_number_of_words_received(start=dt(year=2020), period='y') == 12
-
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=11), period='m') == 4
-    assert analyzer.total_number_of_words_received(start=dt(year=2014, month=12), period='m') == 0
-
-    assert analyzer.total_number_of_words_received(start=dt(year=2018, month=1), period='m') == 1
-    assert analyzer.total_number_of_words_received(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2), period='m') == 11
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=5), period='m') == 1
-
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=13), period='d') == 9
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=14), period='d') == 2
-    assert analyzer.total_number_of_words_received(start=dt(year=2020, month=2, day=18), period='d') == 0
-
-
-def test_total_number_of_characters_received(analyzer):
-    assert analyzer.total_number_of_characters_received() == 68
-
-    assert analyzer.total_number_of_characters_received(start=dt(year=2000), period='y') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014), period='y') == 9
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018), period='y') == 3
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020), period='y') == 56
-
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=9), period='m') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=11), period='m') == 9
-    assert analyzer.total_number_of_characters_received(start=dt(year=2014, month=12), period='m') == 0
-
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018, month=1), period='m') == 3
-    assert analyzer.total_number_of_characters_received(start=dt(year=2018, month=2), period='m') == 0
-
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2), period='m') == 52
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=3), period='m') == 0
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=5), period='m') == 4
-
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=13), period='d') == 30
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=14), period='d') == 22
-    assert analyzer.total_number_of_characters_received(start=dt(year=2020, month=2, day=18), period='d') == 0
diff --git a/tests/TestPeople.py b/tests/TestPeople.py
deleted file mode 100644
index 61295d8..0000000
--- a/tests/TestPeople.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pytest
-
-
-
-@pytest.fixture()
-def people_names():
-    return ['John Doe', 'Donald Duck', 'Szett Droxler', 'Foo Bar', 'Tőke Hal', 'Dér Dénes', 'Teflon Musk', 'Daisy Duck',
-            'Guy Fawkes', 'Benedek Elek']
-
-
-def test_specific_people_has_or_has_not_got_messages(people):
-    # TODO LATER parametrize
-    import pandas as pd
-    assert isinstance(people.data.get('Benedek Elek').get('messages'), pd.DataFrame)
-    assert isinstance(people.data.get('Teflon Musk').get('messages'), pd.DataFrame)
-    assert isinstance(people.data.get('Tőke Hal').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('John Doe').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Szett Droxler').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Daisy Duck').get('messages'), pd.DataFrame)
-    assert not isinstance(people.data.get('Guy Fawkes').get('messages'), pd.DataFrame)
-
-
-def test_people_name(people, people_names):
-    people_without_groups = [p for p in people.data.keys() if not p.startswith('group')]
-    assert sorted(people_names) == sorted(people_without_groups)
-
-
-def test_some_convos_are_with_friends(people):
-    assert people.data.get('Teflon Musk').get('friend')
-    assert not people.data.get('Benedek Elek').get('friend')
-
-
-def test_specific_people_has_or_has_not_got_media(people):
-    assert people.data.get('Teflon Musk').get('media_dir')
-
-#TODO LATER test individuals too
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 430e923..ebdc35e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,14 @@
 import pytest
-from People import People
+import os
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+from miner.People import People
+
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
 
 
 @pytest.fixture(scope='session')
-def people():
-    p = People(path=TEST_DATA_PATH)
-    return p
+def get_people():
+    def _get_people(name=None):
+        return People(path=TEST_DATA_PATH, name=name)
+    return _get_people
+
diff --git a/tests/TestConversationAnalyzer.py b/tests/test_ConversationAnalyzer.py
similarity index 87%
rename from tests/TestConversationAnalyzer.py
rename to tests/test_ConversationAnalyzer.py
index 015ac19..61ab388 100644
--- a/tests/TestConversationAnalyzer.py
+++ b/tests/test_ConversationAnalyzer.py
@@ -1,42 +1,33 @@
 import pytest
-from ConversationAnalyzer import ConversationAnalyzer
-from People import People
-from utils import dt
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+from miner.Analyzer import Analyzer
+from miner.utils import dt
 
 
-# @pytest.mark.parametrize("test_input,expected", [("3+5", 8), ("2+4", 6), ("6*9", 42)])
-# def test_eval(test_input, expected):
-#     assert eval(test_input) == expected
-
-# get\(\'.*\'\)\.
-
-
-@pytest.fixture(scope='session')
-def person(people):
-    def _person(name):
-        return people.individuals[name]
-
-    return _person
+# @pytest.fixture(scope='session')
+# def person(get_people):
+#     def _person(name):
+#         people = get_people(name)
+#         return people.data[name]
+#
+#     return _person
 
 
 @pytest.fixture(scope='session')
-def analyze(person):
+def analyze(get_people):
     def _analyze(name):
-        individual = person(name)
-        return ConversationAnalyzer(name, individual.messages)
+        people = get_people(name)
+        return Analyzer(people)
 
     return _analyze
 
 
 @pytest.fixture(scope='session')
-def statistics(person, analyze):
+def statistics(analyze):
     def _stats(name, **kwargs):
-        individual = person(name)
         analyzer = analyze(name)
         if 'subject' in kwargs or 'start' in kwargs or 'end' in kwargs:  # and others
-            return analyzer.get_stats(individual.messages, **kwargs)
+            return analyzer.get_stats(**kwargs)
         else:
             return analyzer.stats
 
@@ -224,11 +215,5 @@ def test_stats_teflon_musk_all_2014_12(statistics):
     assert stats.char_count == 0
     # assert stats.most_used_chars == 0
 
-class TestConversationAnalyzer: # Foo Bar
-    pass
 
 
-def test_time_series_analysis_for_user(analyze):
-    analyzer = analyze('Teflon Musk')
-    analyzer.get_time_series_data(subject='all')
-    assert 1
diff --git a/tests/test_Conversations.py b/tests/test_Conversations.py
new file mode 100644
index 0000000..5a7cb58
--- /dev/null
+++ b/tests/test_Conversations.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import pytest
+from miner.Conversations import Conversations
+from miner.Individual import Individual
+from miner import utils
+import os
+
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
+
+
+@pytest.fixture()
+def conversations():
+    return Conversations(f'{TEST_DATA_PATH}')
+
+
+@pytest.fixture
+def people_from_private_convos(conversations):
+    return conversations.get_people_from_private_messages()
+
+
+def test_if_paths_are_registered(conversations):
+    assert len(conversations.private_convo_paths) == 4
+    assert len(conversations.group_convo_paths) == 3
+    assert len(conversations.deleted_user_convo_paths) == 0
+
+
+def test_get_all_people_from_private_messages(people_from_private_convos):
+    people = list(people_from_private_convos.keys())
+    expected = ['Foo Bar', 'Teflon Musk', 'Benedek Elek', 'Tőke Hal']
+    assert sorted(people) == sorted(expected)
+
+
+def test_get_all_people_from_convo(conversations):
+    people = []
+    # indie
+    people += list(conversations.private_convo_paths.keys())
+    # group
+    people_from_groups = [p for people in conversations.group_convo_paths.values() for p in people]
+
+    people += people_from_groups
+
+    expected = ['Dér Dénes', 'Facebook User', 'Foo Bar', 'John Doe', 'Teflon Musk', 'Benedek Elek', 'Donald Duck',
+                'Tőke Hal']
+
+    assert sorted(list(set(people))) == sorted(expected)
+
+
+def test_people_are_individual_instances(people_from_private_convos):
+    assert all([isinstance(person, Individual) for person in people_from_private_convos.values()])
+
+
+def test_all_individual_have_messages_df(people_from_private_convos):
+    assert all([isinstance(data.messages, pd.DataFrame) for data in people_from_private_convos.values()])
+
+
+def test_all_individual_have_dir(people_from_private_convos):
+    assert all([data.messages_dir for data in people_from_private_convos.values()])
+
+
+def test_some_individual_as_media_dir(people_from_private_convos):
+    assert people_from_private_convos.get('Teflon Musk').media_dir
+    assert not people_from_private_convos.get('Benedek Elek').media_dir
+
+
+def test_individual_media_has_one_folder_of_possibles(people_from_private_convos):
+    listed_dir = os.listdir(
+        f"{TEST_DATA_PATH}/{utils.MESSAGE_SUBPATH}/{people_from_private_convos.get('Teflon Musk').media_dir}")
+    assert 'files' in listed_dir
+    assert 'photos' in listed_dir
+    assert 'audio' not in listed_dir
+
+
+def test_groups_have_more_than_two_participates(people_from_private_convos):
+    groups = {convo: data for convo, data in people_from_private_convos.items() if convo.startswith('group')}
+    assert all([len(data.get('participants')) > 2 for data in groups.values()])
diff --git a/tests/TestFriends.py b/tests/test_Friends.py
similarity index 85%
rename from tests/TestFriends.py
rename to tests/test_Friends.py
index f336609..c6abfea 100644
--- a/tests/TestFriends.py
+++ b/tests/test_Friends.py
@@ -1,8 +1,9 @@
 import pytest
+import os
 
-from Friends import Friends
+from miner.Friends import Friends
 
-TEST_DATA_PATH = '/home/levente/projects/facebook-data-miner/tests/test_data'
+TEST_DATA_PATH = f'{os.getcwd()}/test_data'
 
 
 @pytest.fixture()
@@ -31,7 +32,7 @@ def test_get_peoples_names_from_friends(friends, expected_friends):
 def test_get_peoples_compact_name_from_friends(friends, expected_friends):
     expected_compact_names = [value.get('compact_name') for value in expected_friends.values()]
 
-    assert all([p.get('compact_name') in expected_compact_names for p in friends.values()])
+    assert all([p.compact_name in expected_compact_names for p in friends.values()])
 
 
 
diff --git a/tests/test_Messages.py b/tests/test_Messages.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_MessagingAnalyzer.py b/tests/test_MessagingAnalyzer.py
new file mode 100644
index 0000000..969dc5d
--- /dev/null
+++ b/tests/test_MessagingAnalyzer.py
@@ -0,0 +1,245 @@
+import pytest
+
+from miner.Analyzer import Analyzer
+from miner.utils import dt
+
+@pytest.fixture(scope='session')
+def analyzer(get_people):
+    people = get_people()
+    return Analyzer(people)
+
+
+def test_total_number_of_messages(analyzer):
+    assert analyzer.get_count(attribute='msg_count', ) == 29
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014), period='y') == 11
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018), period='y') == 3
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020), period='y') == 15
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2011, month=11), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=9), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=11), period='m') == 8
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2014, month=12), period='m') == 2
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018, month=1), period='m') == 3
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2018, month=5), period='m') == 0
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2), period='m') == 10
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=3), period='m') == 1  # jpg
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=4), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=8), period='m') == 1
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13), period='d') == 2
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 2
+
+    assert analyzer.get_count(attribute='msg_count', start=dt(year=2020, month=2, day=13, hour=6), period='d') == 4
+
+
+def test_total_number_of_words(analyzer):
+    assert analyzer.get_count(attribute='word_count', ) == 86
+
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014), period='y') == 20
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018), period='y') == 32
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020), period='y') == 34
+
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=9), period='m') == 6
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=11), period='m') == 13
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2014, month=12), period='m') == 1
+
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018, month=1), period='m') == 32
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2), period='m') == 27
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=4), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=8), period='m') == 2
+
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2, day=13), period='d') == 14
+    assert analyzer.get_count(attribute='word_count', start=dt(year=2020, month=2, day=13, hour=5), period='d') == 14
+
+
+def test_total_number_of_characters(analyzer):
+    assert analyzer.get_count(attribute='char_count', ) == 379
+
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014), period='y') == 69
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018), period='y') == 170
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020), period='y') == 140
+
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=9), period='m') == 24
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=11), period='m') == 42
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2014, month=12), period='m') == 3
+
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018, month=1), period='m') == 170
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=2), period='m') == 114
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=4), period='m') == 17
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=5), period='m') == 4
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', start=dt(year=2020, month=8), period='m') == 5
+
+
+def test_total_number_of_messages_sent(analyzer):
+    assert analyzer.get_count(attribute='msg_count', subject='me', ) == 17
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014), period='y') == 6
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018), period='y') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020), period='y') == 9
+
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=9), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2014, month=12), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018, month=1), period='m') == 2
+
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2011, month=11), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2018, month=5), period='m') == 0
+
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2), period='m') == 6
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=4), period='m') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=8), period='m') == 1
+
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13), period='d') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='me', start=dt(year=2020, month=2, day=13, hour=18), period='h') == 0
+
+
+def test_total_number_of_words_sent(analyzer):
+    assert analyzer.get_count(attribute='word_count', subject='me', ) == 69
+
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014), period='y') == 16
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018), period='y') == 31
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020), period='y') == 22
+
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=9), period='m') == 6
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=11), period='m') == 9
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2014, month=12), period='m') == 1
+
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018, month=1), period='m') == 31
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2), period='m') == 16
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=4), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=8), period='m') == 2
+
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13), period='d') == 5
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 5
+    assert analyzer.get_count(attribute='word_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
+
+
+def test_total_number_of_characters_sent(analyzer):
+    assert analyzer.get_count(attribute='char_count', subject='me', ) == 311
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014), period='y') == 60
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018), period='y') == 167
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020), period='y') == 84
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=9), period='m') == 24
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=11), period='m') == 33
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2014, month=12), period='m') == 3
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018, month=1), period='m') == 167
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2), period='m') == 62
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=4), period='m') == 17
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=5), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=6), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=8), period='m') == 5
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='d') == 21
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='d') == 0
+
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=6), period='h') == 21
+    assert analyzer.get_count(attribute='char_count', subject='me', start=dt(year=2020, month=2, day=13, hour=7), period='h') == 0
+
+
+def test_total_number_of_messages_received(analyzer):
+    assert analyzer.get_count(attribute='msg_count', subject='partner', ) == 12
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014), period='y') == 5
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018), period='y') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020), period='y') == 6
+
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2011, month=11), period='m') == 0
+
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2014, month=12), period='m') == 1
+
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018, month=1), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2018, month=5), period='m') == 0
+
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2), period='m') == 4
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=3), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=4), period='m') == 0
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=5), period='m') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=8), period='m') == 0
+
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 1
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 2
+    assert analyzer.get_count(attribute='msg_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 1
+
+
+def test_total_number_of_words_received(analyzer):
+    assert analyzer.get_count(attribute='word_count', subject='partner', ) == 17
+
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014), period='y') == 4
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018), period='y') == 1
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020), period='y') == 12
+
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=11), period='m') == 4
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2014, month=12), period='m') == 0
+
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018, month=1), period='m') == 1
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2), period='m') == 11
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=5), period='m') == 1
+
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 9
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 2
+    assert analyzer.get_count(attribute='word_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 0
+
+
+def test_total_number_of_characters_received(analyzer):
+    assert analyzer.get_count(attribute='char_count', subject='partner', ) == 68
+
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2000), period='y') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014), period='y') == 9
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018), period='y') == 3
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020), period='y') == 56
+
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=9), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=11), period='m') == 9
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2014, month=12), period='m') == 0
+
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018, month=1), period='m') == 3
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2018, month=2), period='m') == 0
+
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2), period='m') == 52
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=3), period='m') == 0
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=5), period='m') == 4
+
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=13), period='d') == 30
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=14), period='d') == 22
+    assert analyzer.get_count(attribute='char_count', subject='partner', start=dt(year=2020, month=2, day=18), period='d') == 0
diff --git a/tests/test_People.py b/tests/test_People.py
new file mode 100644
index 0000000..e9f270e
--- /dev/null
+++ b/tests/test_People.py
@@ -0,0 +1,39 @@
+import pytest
+
+
+
+@pytest.fixture()
+def people_names():
+    return ['John Doe', 'Donald Duck', 'Szett Droxler', 'Foo Bar', 'Tőke Hal', 'Dér Dénes', 'Teflon Musk', 'Daisy Duck',
+            'Guy Fawkes', 'Benedek Elek']
+
+@pytest.fixture
+def people(get_people):
+    return get_people()
+
+def test_specific_people_has_or_has_not_got_messages(people):
+    # TODO LATER parametrize
+    import pandas as pd
+    assert isinstance(people.data.get('Benedek Elek').messages, pd.DataFrame)
+    assert isinstance(people.data.get('Teflon Musk').messages, pd.DataFrame)
+    assert isinstance(people.data.get('Tőke Hal').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('John Doe').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Szett Droxler').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Daisy Duck').messages, pd.DataFrame)
+    assert not isinstance(people.data.get('Guy Fawkes').messages, pd.DataFrame)
+
+
+def test_people_name(people, people_names):
+    people_without_groups = [p for p in people.data.keys() if not p.startswith('group')]
+    assert sorted(people_names) == sorted(people_without_groups)
+
+
+def test_some_convos_are_with_friends(people):
+    assert people.data.get('Teflon Musk').friend
+    assert not people.data.get('Benedek Elek').friend
+
+
+def test_specific_people_has_or_has_not_got_media(people):
+    assert people.data.get('Teflon Musk').media_dir
+
+#TODO LATER test individuals too
\ No newline at end of file
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 579569c..3577add 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,5 @@
 import unittest
-from utils import *
+from miner.utils import *
 from pathlib import Path
 import reusables
 from reusables.cli import *
@@ -65,23 +65,24 @@
 
 
 def test_generate_date_series():
+    # TODO resolve
     start = datetime(2020, 1, 1, 0, 0)
     end = datetime(2021, 1, 1, 0, 0)
 
-    date_range_year = generate_date_series(start, end, 'y')
+    date_range_year = generate_date_series(period='y', start=start, end=end)
     assert len(date_range_year) == 1 + 1
 
-    date_range_month = generate_date_series(start, end, 'm')
+    date_range_month = generate_date_series(period='m', start=start, end=end)
     assert len(date_range_month) == 12 + 1
 
-    date_range_day = generate_date_series(start, end, 'd')
+    date_range_day = generate_date_series(period='d', start=start, end=end)
     assert len(date_range_day) == 366 + 1
 
-    date_range_hour = generate_date_series(start, end, 'h')
+    date_range_hour = generate_date_series(period='h', start=start, end=end)
     assert len(date_range_hour) == (366 * 24) + 1
 
     for day in date_range_day:
         assert isinstance(day, datetime)
 
     with pytest.raises(ValueError):
-        faulty_date_range = generate_date_series(start, end, )
+        faulty_date_range = generate_date_series(start=start, end=end, )