diff --git a/data_extractor/tests/data/_chat.txt b/data_extractor/tests/data/_chat.txt
index c43d07a..845305d 100644
--- a/data_extractor/tests/data/_chat.txt
+++ b/data_extractor/tests/data/_chat.txt
@@ -1,3 +1,4 @@
+[16/03/2022, 15:10:17] Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them. Tap to learn more.
 [16/03/2022, 15:20:25] person1: Hi shiva!
 [16/03/2022, 15:25:38] person2: Hi 👋
 [16/03/2022, 15:26:48] person3: Hoi!
diff --git a/data_extractor/tests/data/whatsapp_chat.zip b/data_extractor/tests/data/whatsapp_chat.zip
index b3f6edf..c40699d 100644
Binary files a/data_extractor/tests/data/whatsapp_chat.zip and b/data_extractor/tests/data/whatsapp_chat.zip differ
diff --git a/data_extractor/tests/test_whatsapp_chat.py b/data_extractor/tests/test_whatsapp_chat.py
index b10b0b6..87f3768 100644
--- a/data_extractor/tests/test_whatsapp_chat.py
+++ b/data_extractor/tests/test_whatsapp_chat.py
@@ -1,59 +1,75 @@
-from data_extractor.whatsapp_chat import process
-from data_extractor.whatsapp_chat import anonymize_participants
-from data_extractor.whatsapp_chat import get_df_per_participant
 from pathlib import Path
 import pandas as pd
-from pandas.testing import assert_frame_equal
 
+from whatsapp_chat import process
+from whatsapp_chat import anonymize_participants
 
+from pandas.testing import assert_frame_equal
 
 
 DATA_PATH = Path(__file__).parent / "data"
 
 EXPECTED = [
-    {'username': 'person1', 'Total number of words': 20, 'Number of URLs': 1, 'Number of shared locations': 1, 'file_no': 0, 'Number of messages': 3,
-     'Date first message': pd.to_datetime('2022-03-16 15:20:25'), 'Date last message': pd.to_datetime('2022-03-24 20:19:38')},
-
-    {'username': 'person2', 'Total number of words': 7, 'Number of URLs': 1, 'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 3,
-     'Date first message': pd.to_datetime('2022-03-16 15:25:38'), 'Date last message': pd.to_datetime('2022-03-26 18:52:15')},
-
-    {'username': 'person3', 'Total number of words': 1, 'Number of URLs': 0, 'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 1,
-     'Date first message': pd.to_datetime('2022-03-16 15:26:48'), 'Date last message': pd.to_datetime('2022-03-16 15:26:48')},
-
-    {'username': 'person4', 'Total number of words': 21, 'Number of URLs': 0, 'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 2,
-     'Date first message': pd.to_datetime('2020-07-14 22:05:54'), 'Date last message': pd.to_datetime('2022-03-20 20:08:51')}
+    {'username': 'person1', 'Total number of words': 20, 'Number of URLs': 1,
+     'Number of shared locations': 1, 'file_no': 0, 'Number of messages': 3,
+     'Date first message': pd.to_datetime('2022-03-16 15:20:25'),
+     'Date last message': pd.to_datetime('2022-03-24 20:19:38'),
+     'user_reply2': 'person2', 'reply_2_user': 'person2'},
+
+    {'username': 'person2', 'Total number of words': 7, 'Number of URLs': 1,
+     'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 3,
+     'Date first message': pd.to_datetime('2022-03-16 15:25:38'),
+     'Date last message': pd.to_datetime('2022-03-26 18:52:15'),
+     'user_reply2': 'person1', 'reply_2_user': 'person1'},
+
+    {'username': 'person3', 'Total number of words': 1, 'Number of URLs': 0,
+     'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 1,
+     'Date first message': pd.to_datetime('2022-03-16 15:26:48'),
+     'Date last message': pd.to_datetime('2022-03-16 15:26:48'),
+     'user_reply2': 'person2', 'reply_2_user': 'person2'},
+
+    {'username': 'person4', 'Total number of words': 21, 'Number of URLs': 0,
+     'Number of shared locations': 0, 'file_no': 0, 'Number of messages': 2,
+     'Date first message': pd.to_datetime('2020-07-14 22:05:54'),
+     'Date last message': pd.to_datetime('2022-03-20 20:08:51'),
+     'user_reply2': 'person1', 'reply_2_user': 'person1'}
 ]
 
-# EXPECTED_1 = {'Description': ['Total number of words', 'Number of messages', 'Date first message', 'Date last message',
-#                               'Number of URLs', 'Number of shared locations'],
-#               'Value': [20, 3, pd.to_datetime('2022-03-16 15:20:25'), pd.to_datetime('2022-03-24 20:19:38'), 1, 1]}
-#
-# df_expected_1 = pd.DataFrame(data=EXPECTED_1)
-
 
 def test_process():
     """ Test process function.
-        compares the expected dataframe with the output of the process function to check if all the columns are match.
+        compares the expected dataframe with the output of the process function
+         to check if all the columns are match.
         Raises
         -------
-        AssertionError: When provided expected dataframe could not match the participants dataframe
+        AssertionError: When provided expected dataframe could not match the
+         participants dataframe
         """
 
     df_expected = pd.DataFrame(EXPECTED)
     df_expected = anonymize_participants(df_expected)
     df_expected['Number of messages'] = df_expected['Number of messages'].astype('int64')
     df_expected['Number of URLs'] = df_expected['Number of URLs'].astype('int32')
-    df_expected['Number of shared locations'] = df_expected['Number of shared locations'].astype('int32')
+    df_expected['Number of shared locations'] = \
+        df_expected['Number of shared locations'].astype('int32')
     df_expected['file_no'] = df_expected['file_no'].astype('int32')
 
     results = []
     df_melt = pd.melt(df_expected, id_vars=["username"],
-                      value_vars=["Total number of words", "Number of messages", "Date first message", "Date last message",
-                                  "Number of URLs", "file_no", "Number of shared locations"], var_name='Description', value_name='Value')
+                      value_vars=["Total number of words",
+                                  "Number of messages",
+                                  "Date first message",
+                                  "Date last message",
+                                  "Number of URLs",
+                                  "file_no",
+                                  "Number of shared locations",
+                                  "reply_2_user",
+                                  "user_reply2"],
+                      var_name='Description', value_name='Value')
 
     usernames = df_melt["username"].unique()
-    for u in usernames:
-        df_user = df_melt[(df_melt["username"] == u) & df_melt["Value"] != 0]
+    for user in usernames:
+        df_user = df_melt[(df_melt["username"] == user) & df_melt["Value"] != 0]
         results.append(df_user)
 
     expected_results = []
@@ -66,29 +82,10 @@ def test_process():
                 "data_frame": df[["Description", "Value"]].reset_index(drop=True)
             }
         )
-    # print(type(expected_results[0]["data_frame"]))
-    # print(expected_results[1]["data_frame"])
-    # print('******')
 
-    # result = process(DATA_PATH.joinpath("whatsapp_chat.zip"))
     df_result = process(DATA_PATH.joinpath("_chat.txt"))
-    # print(df_result[1]["data_frame"])
+
     assert_frame_equal(df_result[0]["data_frame"], expected_results[0]["data_frame"])
     assert_frame_equal(df_result[1]["data_frame"], expected_results[1]["data_frame"])
     assert_frame_equal(df_result[2]["data_frame"], expected_results[2]["data_frame"])
     assert_frame_equal(df_result[3]["data_frame"], expected_results[3]["data_frame"])
-
-    # print(type(df_result[0]["data_frame"]))
-    # print(df_result[0]["data_frame"])
-    # print(df_expected_1)
-    #
-    # # assert len(result_file) == 1
-    #
-    #
-    # assert_frame_equal(df_result[0]["data_frame"], df_expected_1)
-
-
-if __name__ == "__main__":
-    test_process()
-
-
diff --git a/data_extractor/whatsapp_chat/__init__.py b/data_extractor/whatsapp_chat/__init__.py
index d679838..f6b2045 100644
--- a/data_extractor/whatsapp_chat/__init__.py
+++ b/data_extractor/whatsapp_chat/__init__.py
@@ -7,11 +7,7 @@
 import re
 from datetime import datetime
 import pandas as pd
-import hashlib
 import zipfile
-from pathlib import Path
-#from nltk.corpus import stopwords
-#from sklearn.feature_extraction.text import TfidfVectorizer
 
 
 URL_PATTERN = r'(https?://\S+)'
@@ -20,10 +16,7 @@
 FILE_RE = re.compile(r".*.txt$")
 HIDDEN_FILE_RE = re.compile(r".*__MACOSX*")
 
-SYSTEM_MESSAGES=[
-    'Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.',
-    'Berichten en gesprekken worden end-to-end versleuteld. Niemand buiten deze chat kan ze lezen of beluisteren, zelfs WhatsApp niet.'
-]
+SYSTEM_MESSAGES = ['end-to-end','WhatsApp']
 hformats = ['%m/%d/%y, %H:%M - %name:', '[%d/%m/%y, %H:%M:%S] %name:', '%d-%m-%y %H:%M - %name:', '[%d-%m-%y %H:%M:%S] %name:']
 
 
@@ -42,42 +35,33 @@ class ColnamesDf:
     MESSAGE_LENGTH = 'message_length'
     """Message length column"""
 
-    FirstMessage = 'Date first message' #'first_message_date'
+    FirstMessage = 'Date first message'
     """Date of first message column"""
 
-    LastMessage = 'Date last message' #'last_message_date'
+    LastMessage = 'Date last message'
     """Date of last message column"""
 
-    MESSAGE_NO = 'Number of messages' #'message_no'
+    MESSAGE_NO = 'Number of messages'
     """Number of Message  column"""
 
-    WORDS_NO = 'Total number of words' #'total_words_no'
+    WORDS_NO = 'Total number of words'
     """Total number of words  column"""
 
-    REPLY_2USER = 'reply_2_user'
+    REPLY_2USER = 'Who replies to you the most often?'
     """Who replies to the user the most column"""
 
-    MAX_REPLY_2 = 'max_reply_2'
+    USER_REPLY2 = 'Who do you most often reply to?'
     """User replies to who the most column"""
 
-    USER_REPLY2 = 'user_reply2'
-    """User replies to who the most column"""
-
-    URL_NO = 'Number of URLs'#,'url_no'
+    URL_NO = 'Number of URLs'
     """Number of URLs column"""
 
-    LOCATION_NO = 'Number of shared locations'#'location_no'
+    LOCATION_NO = 'Number of shared locations'
     """Number of locations column"""
 
-    FILE_NO = 'Number of shared files'#'file_no'
+    FILE_NO = 'Number of shared files'
     """Number of files column"""
 
-    OUT_DEGREE = 'out_degree'
-    """Total number of sent message column"""
-
-    IN_DEGREE = 'in_degree'
-    """Total number of received message column"""
-
     EMOJI_NO = 'emoji_no'
     """Total number of emojies column"""
 
@@ -215,9 +199,11 @@ def remove_alerts_from_df(r_x, df):
     pandas.DataFrame
         Fixed version of input DataFrame
     """
+
+    alerts_no = count_alerts(r_x, df)
     df_new = df.copy()
     df_new.loc[:, COLNAMES_DF.MESSAGE] = df_new[COLNAMES_DF.MESSAGE].apply(lambda x: remove_alerts_from_line(r_x, x))
-    return df_new
+    return df_new,alerts_no
 
 
 def remove_alerts_from_line(r_x, line_df):
@@ -234,11 +220,32 @@ def remove_alerts_from_line(r_x, line_df):
         Cleaned message string
     """
     if re.search(r_x, line_df):
+        print(line_df[:re.search(r_x, line_df).start()])
         return line_df[:re.search(r_x, line_df).start()]
     else:
         return line_df
 
 
+def count_alerts(r_x, df):
+    """Count line content that is not desirable (automatic alerts etc.).
+    Parameters
+    ----------
+    r_x : str
+        Regula expression to detect WhatsApp warnings
+    df : pandas.DataFrame
+        pandas.DataFrame with all interventions
+
+    Returns
+    -------
+    int
+        Number of line contents that is not desirable
+    """
+
+    # alerts_count = df[COLNAMES_DF.MESSAGE].apply(lambda x: (re.search(r_x, x) is not None))
+    alerts_count = df[COLNAMES_DF.MESSAGE].apply(lambda x: re.findall(r_x, x))
+    return alerts_count.str.len().sum()
+
+
 def get_message(text, headers, i):
     """Get i:th message from text.
     Parameters
@@ -320,9 +327,12 @@ def make_chat_df(log_error, text, hformat):
     # Parse chat to DataFrame
     try:
         df = parse_text(text, r)
-        df = remove_alerts_from_df(r_x, df)
+        df, alerts_no = remove_alerts_from_df(r_x, df)
         df = add_schema(df)
 
+        if alerts_no>0:
+            log_error("Number of unprocessed system messages: "+str(alerts_no))
+
         return df
     except:
         print(f"hformat : {hformat} is not match with the given text")
@@ -481,21 +491,15 @@ def anonymize_participants(df_participants):
     # df_participants[COLNAMES_DF.USER_REPLY2] = df_participants[COLNAMES_DF.USER_REPLY2].apply(lambda u: anonym_txt(u,salt))
     # df_participants[['username', 'user_reply2']] = df_participants[['username', 'user_reply2']].stack().rank(method='dense').unstack()
 
-    # stacked = df_participants[['username', 'user_reply2', 'reply_2_user']].stack()
-    # df_participants[['username', 'user_reply2', 'reply_2_user']] = pd.Series(stacked.factorize()[0],
-    #                                                                          index=stacked.index).unstack()
-    # df_participants[['username', 'user_reply2', 'reply_2_user']] = 'person' + df_participants[['username', 'user_reply2',
-    #                                                                                     'reply_2_user']].astype(str)
-    #
-
-    df_participants['username'] = pd.factorize(df_participants.username)[0] + 1
-    df_participants['username'] = 'person' + df_participants['username'].astype(str)
-
-
+    stacked = df_participants[[COLNAMES_DF.USERNAME,COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]].stack()
+    df_participants[[COLNAMES_DF.USERNAME,COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]] = \
+        pd.Series(stacked.factorize()[0], index=stacked.index).unstack()
+    df_participants[[COLNAMES_DF.USERNAME,COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]] = \
+        'person' + df_participants[[COLNAMES_DF.USERNAME,COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]].astype(str)
     return df_participants
 
 
-def get_df_per_participant(df, anonymize):
+def get_wide_to_long_participant(df):
     """Generate one dataframe for each participant .
         Parameter
         ----------
@@ -514,16 +518,16 @@ def get_df_per_participant(df, anonymize):
     df_melt = pd.melt(df, id_vars=[COLNAMES_DF.USERNAME], value_vars=[COLNAMES_DF.WORDS_NO, COLNAMES_DF.MESSAGE_NO,
                                                                       COLNAMES_DF.FirstMessage, COLNAMES_DF.LastMessage,
                                                                       COLNAMES_DF.URL_NO, COLNAMES_DF.FILE_NO,
-                                                                      COLNAMES_DF.LOCATION_NO],
+                                                                      COLNAMES_DF.LOCATION_NO,
+                                                                      COLNAMES_DF.REPLY_2USER,
+                                                                      COLNAMES_DF.USER_REPLY2],
                       var_name=COLNAMES_DF.DESCRIPTION, value_name=COLNAMES_DF.VALUE)
 
-    # usernames = set(df_melt[COLNAMES_DF.USERNAME])
-    usernames = df_melt[COLNAMES_DF.USERNAME].unique()
+    usernames = sorted(set(df_melt[COLNAMES_DF.USERNAME]))
     for u in usernames:
         df_user = df_melt[(df_melt[COLNAMES_DF.USERNAME] == u) &
                           df_melt[COLNAMES_DF.VALUE] != 0]
-        # if anonymize:
-        #     df_user = anonymize_participants(df_user)
+
         results.append(df_user)
 
     return results
@@ -568,25 +572,20 @@ def get_participants_features(df_chat):
     }).reset_index()
 
     response_matrix = get_response_matrix(df_chat)
-    out_degree = response_matrix.sum(axis=1)
-    in_degree = response_matrix.T.sum(axis=1)
     user_reply2 = response_matrix.idxmax(axis=1)
     reply2_user = response_matrix.T.idxmax(axis=1)
 
-    response_matrix[COLNAMES_DF.OUT_DEGREE] = out_degree
-    response_matrix[COLNAMES_DF.IN_DEGREE] = in_degree
     response_matrix[COLNAMES_DF.USER_REPLY2] = user_reply2
     response_matrix[COLNAMES_DF.REPLY_2USER] = reply2_user
     response_matrix.index.name = COLNAMES_DF.USERNAME
-    response_matrix = response_matrix.loc[:,
-                      [COLNAMES_DF.OUT_DEGREE, COLNAMES_DF.IN_DEGREE, COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]]
+    response_matrix = response_matrix.loc[:,[COLNAMES_DF.USER_REPLY2, COLNAMES_DF.REPLY_2USER]]
     response_matrix = response_matrix.reset_index()
 
     df_participants = pd.merge(df_participants, response_matrix, how="left", on=COLNAMES_DF.USERNAME, validate="1:1")
 
     return df_participants
 
-def remove_system_messages(chat):
+def remove_system_messages(log_error, chat):
     """Removes system messages from chat
     Parameters
     ----------
@@ -597,11 +596,14 @@ def remove_system_messages(chat):
     pandas.DataFrame
         A filtered dataframe
     """
-    # print(chat.loc[0,COLNAMES_DF.MESSAGE])
-    # print(SYSTEM_MESSAGES[1])
-    for m in SYSTEM_MESSAGES:
-        group_name = chat.loc[chat[COLNAMES_DF.MESSAGE]==m,COLNAMES_DF.USERNAME]
-        # print(group_name)
+
+    message0 = chat.loc[0, COLNAMES_DF.MESSAGE]
+    is_system_message = True if all(s in message0 for s in SYSTEM_MESSAGES) else False
+    if is_system_message:
+        group_name = chat.loc[0, COLNAMES_DF.USERNAME]
+        log_error("Identified group name:"+group_name)
+        chat = chat.loc[chat[COLNAMES_DF.USERNAME] != group_name,]
+
     return chat
 
 def extract_participants_features(chat, anonymize=True):
@@ -619,13 +621,16 @@ def extract_participants_features(chat, anonymize=True):
     """
 
     df = get_participants_features(chat)
-    results = get_df_per_participant(df, anonymize)
+    if anonymize:
+        df= anonymize_participants(df)
+
+    results = get_wide_to_long_participant(df)
     return results
 
 # ***** end of analysis functions *****
 
 
-def format_results(df_list):
+def format_results(df_list, error):
     """Format results to the standard format.
     Parameters
     ----------
@@ -639,11 +644,13 @@ def format_results(df_list):
         user_name = pd.unique(df[COLNAMES_DF.USERNAME])[0]
         results.append(
             {
-                "id": user_name,#"overview",
-                "title": user_name,#"The following data is extracted from the file:",
+                "id": user_name,
+                "title": user_name,
                 "data_frame": df[[COLNAMES_DF.DESCRIPTION,COLNAMES_DF.VALUE]].reset_index(drop=True)
             }
         )
+    if len(error)>0:
+        results = results+error
     return results
 
 
@@ -656,46 +663,49 @@ def format_errors(errors):
     -------
     pandas.dataframe
     """
+    if len(errors) == 0:
+        return []
     data_frame = pd.DataFrame()
     data_frame["Messages"] = pd.Series(errors, name="Messages")
-    return {"id": "extraction_log", "title": "Extraction log", "data_frame": data_frame}
+    return [{"id": "extraction_log", "title": "Extraction log", "data_frame": data_frame}]
 
 
 def process(file_data):
-    """Convert whatsapp chat_file.zip to participants dataframe.
+    """Convert whatsapp chat file to participant dataframes.
     This is the main function which extracts the participants
-    information from the row chat_file.zip provided by data-donators.
+    information from the row chat file provided by data-donators.
     Parameters
     ----------
     file_data : str
-        The path of the chat_file.zip
+        The path of the chat file. It can be in zip or txt format.
     Returns
     -------
     pandas.dataframe
-        Extracted data from the chat_file
+        Extracted data from the chat file
     """
     errors = []
     log_error = errors.append
-    zfile = None
-    #chats = []
+
     try:
         zfile = zipfile.ZipFile(file_data)
     except:
         if FILE_RE.match(file_data.name):
             tfile = open(file_data, encoding="utf8")
             chat = parse_chat(log_error, tfile.read())
-            #chats.append(chat)
+
         else:
             log_error("There is not a valid file format.")
-            return [format_errors(errors)]
+            return format_errors(errors)
     else:
         chat = parse_zipfile(log_error, zfile)
-    if errors:
-        return [format_errors(errors)]
 
-    # print(chat)
-    chat = remove_system_messages(chat)
-    participants = extract_participants_features(chat)
-    formatted_results = format_results(participants)
+    if chat is not None:
+        chat = remove_system_messages(log_error,chat)
+        participants = extract_participants_features(chat)
+
+        formatted_results = format_results(participants, format_errors(errors))
+
+    else:
+        return format_errors(errors)
 
     return formatted_results