diff --git a/src/global/IndexTypes.h b/src/global/IndexTypes.h
index 08ee960d00..4868e59694 100644
--- a/src/global/IndexTypes.h
+++ b/src/global/IndexTypes.h
@@ -16,3 +16,4 @@ using LocalVocabIndex = const LocalVocabEntry*;
 using TextRecordIndex = ad_utility::TypedIndex<uint64_t, "TextRecordIndex">;
 using WordVocabIndex = ad_utility::TypedIndex<uint64_t, "WordVocabIndex">;
 using BlankNodeIndex = ad_utility::TypedIndex<uint64_t, "BlankNodeIndex">;
+using DocumentIndex = ad_utility::TypedIndex<uint64_t, "DocumentIndex">;
diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp
index 76c0015974..d1e6a70777 100644
--- a/src/index/IndexImpl.Text.cpp
+++ b/src/index/IndexImpl.Text.cpp
@@ -17,39 +17,22 @@
 #include "backports/algorithm.h"
 #include "engine/CallFixedSize.h"
 #include "index/FTSAlgorithms.h"
-#include "parser/ContextFileParser.h"
+#include "parser/WordsAndDocsFileParser.h"
 #include "util/Conversions.h"
 #include "util/Simple8bCode.h"
 
-namespace {
-
-// Custom delimiter class for tokenization of literals using `absl::StrSplit`.
-// The `Find` function returns the next delimiter in `text` after the given
-// `pos` or an empty substring if there is no next delimiter.
-struct LiteralsTokenizationDelimiter {
-  absl::string_view Find(absl::string_view text, size_t pos) {
-    auto isWordChar = [](char c) -> bool { return std::isalnum(c); };
-    auto found = std::find_if_not(text.begin() + pos, text.end(), isWordChar);
-    if (found == text.end()) return text.substr(text.size());
-    return {found, found + 1};
-  }
-};
-
-}  // namespace
-
 // _____________________________________________________________________________
-cppcoro::generator<ContextFileParser::Line> IndexImpl::wordsInTextRecords(
-    const std::string& contextFile, bool addWordsFromLiterals) {
+cppcoro::generator<WordsFileLine> IndexImpl::wordsInTextRecords(
+    std::string contextFile, bool addWordsFromLiterals) const {
   auto localeManager = textVocab_.getLocaleManager();
   // ROUND 1: If context file aka wordsfile is not empty, read words from there.
   // Remember the last context id for the (optional) second round.
   TextRecordIndex contextId = TextRecordIndex::make(0);
   if (!contextFile.empty()) {
-    ContextFileParser::Line line;
-    ContextFileParser p(contextFile, localeManager);
+    WordsFileParser p(contextFile, localeManager);
     ad_utility::HashSet<string> items;
-    while (p.getLine(line)) {
-      contextId = line._contextId;
+    for (auto& line : p) {
+      contextId = line.contextId_;
       co_yield line;
     }
     if (contextId > TextRecordIndex::make(0)) {
@@ -65,15 +48,13 @@ cppcoro::generator<ContextFileParser::Line> IndexImpl::wordsInTextRecords(
       if (!isLiteral(text)) {
         continue;
       }
-      ContextFileParser::Line entityLine{text, true, contextId, 1, true};
+      WordsFileLine entityLine{text, true, contextId, 1, true};
       co_yield entityLine;
       std::string_view textView = text;
       textView = textView.substr(0, textView.rfind('"'));
       textView.remove_prefix(1);
-      for (auto word : absl::StrSplit(textView, LiteralsTokenizationDelimiter{},
-                                      absl::SkipEmpty{})) {
-        auto wordNormalized = localeManager.getLowercaseUtf8(word);
-        ContextFileParser::Line wordLine{wordNormalized, false, contextId, 1};
+      for (auto word : tokenizeAndNormalizeText(textView, localeManager)) {
+        WordsFileLine wordLine{std::move(word), false, contextId, 1};
         co_yield wordLine;
       }
       contextId = contextId.incremented();
@@ -81,6 +62,56 @@ cppcoro::generator<ContextFileParser::Line> IndexImpl::wordsInTextRecords(
   }
 }
 
+// _____________________________________________________________________________
+void IndexImpl::processEntityCaseDuringInvertedListProcessing(
+    const WordsFileLine& line,
+    ad_utility::HashMap<Id, Score>& entitiesInContext, size_t& nofLiterals,
+    size_t& entityNotFoundErrorMsgCount) const {
+  VocabIndex eid;
+  // TODO<joka921> Currently only IRIs and strings from the vocabulary can
+  // be tagged entities in the text index (no doubles, ints, etc).
+  if (getVocab().getId(line.word_, &eid)) {
+    // Note that `entitiesInContext` is a HashMap, so the `Id`s don't have
+    // to be contiguous.
+    entitiesInContext[Id::makeFromVocabIndex(eid)] += line.score_;
+    if (line.isLiteralEntity_) {
+      ++nofLiterals;
+    }
+  } else {
+    logEntityNotFound(line.word_, entityNotFoundErrorMsgCount);
+  }
+}
+
+// _____________________________________________________________________________
+void IndexImpl::processWordCaseDuringInvertedListProcessing(
+    const WordsFileLine& line,
+    ad_utility::HashMap<WordIndex, Score>& wordsInContext) const {
+  // TODO<joka921> Let the `textVocab_` return a `WordIndex` directly.
+  WordVocabIndex vid;
+  bool ret = textVocab_.getId(line.word_, &vid);
+  WordIndex wid = vid.get();
+  if (!ret) {
+    LOG(ERROR) << "ERROR: word \"" << line.word_ << "\" "
+               << "not found in textVocab. Terminating\n";
+    AD_FAIL();
+  }
+  wordsInContext[wid] += line.score_;
+}
+
+// _____________________________________________________________________________
+void IndexImpl::logEntityNotFound(const string& word,
+                                  size_t& entityNotFoundErrorMsgCount) const {
+  if (entityNotFoundErrorMsgCount < 20) {
+    LOG(WARN) << "Entity from text not in KB: " << word << '\n';
+    if (++entityNotFoundErrorMsgCount == 20) {
+      LOG(WARN) << "There are more entities not in the KB..."
+                << " suppressing further warnings...\n";
+    }
+  } else {
+    entityNotFoundErrorMsgCount++;
+  }
+}
+
 // _____________________________________________________________________________
 void IndexImpl::addTextFromContextFile(const string& contextFile,
                                        bool addWordsFromLiterals) {
@@ -214,12 +245,12 @@ size_t IndexImpl::processWordsForVocabulary(string const& contextFile,
   for (auto line : wordsInTextRecords(contextFile, addWordsFromLiterals)) {
     ++numLines;
     // LOG(INFO) << "LINE: "
-    //           << std::setw(50) << line._word << "   "
-    //           << line._isEntity << "\t"
-    //           << line._contextId.get() << "\t"
-    //           << line._score << std::endl;
-    if (!line._isEntity) {
-      distinctWords.insert(line._word);
+    //           << std::setw(50) << line.word_ << "   "
+    //           << line.isEntity_ << "\t"
+    //           << line.contextId_.get() << "\t"
+    //           << line.score_ << std::endl;
+    if (!line.isEntity_) {
+      distinctWords.insert(line.word_);
     }
   }
   textVocab_.createFromSet(distinctWords, onDiskBase_ + ".text.vocabulary");
@@ -243,49 +274,21 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile,
   size_t nofLiterals = 0;
 
   for (auto line : wordsInTextRecords(contextFile, addWordsFromLiterals)) {
-    if (line._contextId != currentContext) {
+    if (line.contextId_ != currentContext) {
       ++nofContexts;
       addContextToVector(writer, currentContext, wordsInContext,
                          entitiesInContext);
-      currentContext = line._contextId;
+      currentContext = line.contextId_;
       wordsInContext.clear();
       entitiesInContext.clear();
     }
-    if (line._isEntity) {
+    if (line.isEntity_) {
       ++nofEntityPostings;
-      // TODO<joka921> Currently only IRIs and strings from the vocabulary can
-      // be tagged entities in the text index (no doubles, ints, etc).
-      VocabIndex eid;
-      if (getVocab().getId(line._word, &eid)) {
-        // Note that `entitiesInContext` is a HashMap, so the `Id`s don't have
-        // to be contiguous.
-        entitiesInContext[Id::makeFromVocabIndex(eid)] += line._score;
-        if (line._isLiteralEntity) {
-          ++nofLiterals;
-        }
-      } else {
-        if (entityNotFoundErrorMsgCount < 20) {
-          LOG(WARN) << "Entity from text not in KB: " << line._word << '\n';
-          if (++entityNotFoundErrorMsgCount == 20) {
-            LOG(WARN) << "There are more entities not in the KB..."
-                      << " suppressing further warnings...\n";
-          }
-        } else {
-          entityNotFoundErrorMsgCount++;
-        }
-      }
+      processEntityCaseDuringInvertedListProcessing(
+          line, entitiesInContext, nofLiterals, entityNotFoundErrorMsgCount);
     } else {
       ++nofWordPostings;
-      // TODO<joka921> Let the `textVocab_` return a `WordIndex` directly.
-      WordVocabIndex vid;
-      bool ret = textVocab_.getId(line._word, &vid);
-      WordIndex wid = vid.get();
-      if (!ret) {
-        LOG(ERROR) << "ERROR: word \"" << line._word << "\" "
-                   << "not found in textVocab. Terminating\n";
-        AD_FAIL();
-      }
-      wordsInContext[wid] += line._score;
+      processWordCaseDuringInvertedListProcessing(line, wordsInContext);
     }
   }
   if (entityNotFoundErrorMsgCount > 0) {
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index d9ec19eb14..ac0003db87 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -29,9 +29,9 @@
 #include "index/TextMetaData.h"
 #include "index/Vocabulary.h"
 #include "index/VocabularyMerger.h"
-#include "parser/ContextFileParser.h"
 #include "parser/RdfParser.h"
 #include "parser/TripleComponent.h"
+#include "parser/WordsAndDocsFileParser.h"
 #include "util/BufferedVector.h"
 #include "util/CancellationHandle.h"
 #include "util/File.h"
@@ -521,8 +521,20 @@ class IndexImpl {
   // TODO: So far, this is limited to the internal vocabulary (still in the
   // testing phase, once it works, it should be easy to include the IRIs and
   // literals from the external vocabulary as well).
-  cppcoro::generator<ContextFileParser::Line> wordsInTextRecords(
-      const std::string& contextFile, bool addWordsFromLiterals);
+  cppcoro::generator<WordsFileLine> wordsInTextRecords(
+      std::string contextFile, bool addWordsFromLiterals) const;
+
+  void processEntityCaseDuringInvertedListProcessing(
+      const WordsFileLine& line,
+      ad_utility::HashMap<Id, Score>& entitiesInContxt, size_t& nofLiterals,
+      size_t& entityNotFoundErrorMsgCount) const;
+
+  void processWordCaseDuringInvertedListProcessing(
+      const WordsFileLine& line,
+      ad_utility::HashMap<WordIndex, Score>& wordsInContext) const;
+
+  void logEntityNotFound(const string& word,
+                         size_t& entityNotFoundErrorMsgCount) const;
 
   size_t processWordsForVocabulary(const string& contextFile,
                                    bool addWordsFromLiterals);
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt
index be4b3db44c..6fa123a793 100644
--- a/src/parser/CMakeLists.txt
+++ b/src/parser/CMakeLists.txt
@@ -10,7 +10,7 @@ add_library(parser
         ParsedQuery.cpp
         RdfParser.cpp
         Tokenizer.cpp
-        ContextFileParser.cpp
+        WordsAndDocsFileParser.cpp
         TurtleTokenId.h
         ParallelBuffer.cpp
         SparqlParserHelpers.cpp
diff --git a/src/parser/ContextFileParser.cpp b/src/parser/ContextFileParser.cpp
deleted file mode 100644
index 523bde486b..0000000000
--- a/src/parser/ContextFileParser.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2015, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
-
-#include "./ContextFileParser.h"
-
-#include <cassert>
-
-#include "../util/Exception.h"
-#include "../util/StringUtils.h"
-
-// _____________________________________________________________________________
-ContextFileParser::ContextFileParser(const string& contextFile,
-                                     LocaleManager localeManager)
-    : _in(contextFile), _localeManager(std::move(localeManager)) {}
-
-// _____________________________________________________________________________
-ContextFileParser::~ContextFileParser() { _in.close(); }
-
-// _____________________________________________________________________________
-bool ContextFileParser::getLine(ContextFileParser::Line& line) {
-  string l;
-  if (std::getline(_in, l)) {
-    size_t i = l.find('\t');
-    assert(i != string::npos);
-    size_t j = i + 2;
-    assert(j + 3 < l.size());
-    size_t k = l.find('\t', j + 2);
-    assert(k != string::npos);
-    line._isEntity = (l[i + 1] == '1');
-    line._word =
-        (line._isEntity ? l.substr(0, i)
-                        : _localeManager.getLowercaseUtf8(l.substr(0, i)));
-    line._contextId =
-        TextRecordIndex::make(atol(l.substr(j + 1, k - j - 1).c_str()));
-    line._score = static_cast<Score>(atol(l.substr(k + 1).c_str()));
-#ifndef NDEBUG
-    if (_lastCId > line._contextId) {
-      AD_THROW("ContextFile has to be sorted by context Id.");
-    }
-    _lastCId = line._contextId;
-#endif
-    return true;
-  }
-  return false;
-}
diff --git a/src/parser/ContextFileParser.h b/src/parser/ContextFileParser.h
deleted file mode 100644
index ba8d7bac9c..0000000000
--- a/src/parser/ContextFileParser.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2015, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
-
-#pragma once
-
-#include <unicode/locid.h>
-
-#include <fstream>
-#include <string>
-
-#include "../global/Id.h"
-#include "../index/StringSortComparator.h"
-
-using std::string;
-
-class ContextFileParser {
- public:
-  struct Line {
-    string _word;
-    bool _isEntity;
-    TextRecordIndex _contextId;
-    Score _score;
-    bool _isLiteralEntity = false;
-  };
-
-  explicit ContextFileParser(const string& contextFile,
-                             LocaleManager localeManager);
-  ~ContextFileParser();
-  // Don't allow copy & assignment
-  explicit ContextFileParser(const ContextFileParser& other) = delete;
-  ContextFileParser& operator=(const ContextFileParser& other) = delete;
-
-  // Get the next line from the file.
-  // Returns true if something was stored.
-  bool getLine(Line&);
-
- private:
-  std::ifstream _in;
-#ifndef NDEBUG
-  // Only used for sanity checks in debug builds
-  TextRecordIndex _lastCId = TextRecordIndex::make(0);
-#endif
-  LocaleManager _localeManager;
-};
diff --git a/src/parser/WordsAndDocsFileParser.cpp b/src/parser/WordsAndDocsFileParser.cpp
new file mode 100644
index 0000000000..e7d36974c6
--- /dev/null
+++ b/src/parser/WordsAndDocsFileParser.cpp
@@ -0,0 +1,61 @@
+// Copyright 2015, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+//         Felix Meisen (fesemeisen@outlook.de)
+
+#include "parser/WordsAndDocsFileParser.h"
+
+#include <cassert>
+
+#include "util/Exception.h"
+#include "util/StringUtils.h"
+
+// _____________________________________________________________________________
+WordsAndDocsFileParser::WordsAndDocsFileParser(
+    const string& wordsOrDocsFile, const LocaleManager& localeManager)
+    : in_(wordsOrDocsFile), localeManager_(localeManager) {}
+
+// _____________________________________________________________________________
+ad_utility::InputRangeFromGet<WordsFileLine>::Storage WordsFileParser::get() {
+  WordsFileLine line;
+  string l;
+  if (!std::getline(getInputStream(), l)) {
+    return std::nullopt;
+  }
+  std::string_view lineView(l);
+  size_t i = lineView.find('\t');
+  assert(i != string::npos);
+  size_t j = i + 2;
+  assert(j + 3 < lineView.size());
+  size_t k = lineView.find('\t', j + 2);
+  assert(k != string::npos);
+  line.isEntity_ = (lineView[i + 1] == '1');
+  line.word_ =
+      (line.isEntity_
+           ? lineView.substr(0, i)
+           : getLocaleManager().getLowercaseUtf8(lineView.substr(0, i)));
+  line.contextId_ =
+      TextRecordIndex::make(atol(lineView.substr(j + 1, k - j - 1).data()));
+  line.score_ = static_cast<Score>(atol(lineView.substr(k + 1).data()));
+#ifndef NDEBUG
+  if (lastCId_ > line.contextId_) {
+    AD_THROW("ContextFile has to be sorted by context Id.");
+  }
+  lastCId_ = line.contextId_;
+#endif
+  return line;
+}
+
+// _____________________________________________________________________________
+ad_utility::InputRangeFromGet<DocsFileLine>::Storage DocsFileParser::get() {
+  string l;
+  if (!std::getline(getInputStream(), l)) {
+    return std::nullopt;
+  }
+  DocsFileLine line;
+  size_t i = l.find('\t');
+  assert(i != string::npos);
+  line.docId_ = DocumentIndex::make(atol(l.substr(0, i).c_str()));
+  line.docContent_ = l.substr(i + 1);
+  return line;
+}
diff --git a/src/parser/WordsAndDocsFileParser.h b/src/parser/WordsAndDocsFileParser.h
new file mode 100644
index 0000000000..1fc80523ff
--- /dev/null
+++ b/src/parser/WordsAndDocsFileParser.h
@@ -0,0 +1,192 @@
+// Copyright 2015, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+//         Felix Meisen (fesemeisen@outlook.de)
+
+#pragma once
+
+#include <absl/strings/str_split.h>
+#include <unicode/locid.h>
+
+#include <fstream>
+#include <string>
+
+#include "global/Id.h"
+#include "index/StringSortComparator.h"
+#include "util/Iterators.h"
+#include "util/Views.h"
+
+using std::string;
+
+/**
+ * @brief Represents a line in the words file.
+ *
+ * This struct holds information about a word or entity as it appears in the
+ * words file.
+ *
+ * The Fields are ordered in the same way the values follow in a line.
+ * Short field overview: string word_, bool isEntity, TextRecordIndex contextId,
+ *                       Score score_, bool isLiteralEntity (not found in
+ *                       wordsfile)
+ *
+ * @details
+ *
+ * Fields:
+ * - string word_: The string of the word, if it is an entity it will be
+ *                 <Entity_Name>.
+ * - bool isEntity_: True if the given word is an entity, false if it's a word.
+ * - TextRecordIndex contextId_: When creating the wordsfile docs from the
+ *                               docsfile get split into so called contexts.
+ *                               Those contexts overlap, meaning words and
+ *                               entities are covered multiple times. Each
+ *                               contextId corresponds to the next bigger or
+ *                               equal docId.
+ * - Score score_: Either 1 or 0 if isEntity is false. 0, 1, 100, 150 if
+ *                 isEntity is true. (this info is only constructed on the
+ *                 scientists.wordsfile.tsv) The score in the wordsfile is only
+ *                 relevant for the counting scoring metric. Because of the
+ *                 overlap of contexts the score is 1 if the word really has
+ *                 been seen for the first time and 0 if not. If a doc contains
+ *                 multiple mentions of a word there should be exactly as many
+ *                 wordsfile lines of that word with score 1 as there are
+ *                 mentions. The score for entities seems rather random and
+ *                 since no clear explanation of the creation of wordsfiles
+ *                 has been found yet they will stay rather random.
+ * - bool isLiteralEntity_: This does not directly stem from the wordsfile.
+ *                          When building the text index with literals, for
+ *                          every literal there will be WordsFileLines for all
+ *                          words in that literal. Additionally the whole
+ *                          literal itself will be added as word with isEntity
+ *                          being true. The need to count this comes only from
+ *                          a trick used in testing right now.  To be specific
+ *                          the method getTextRecordFromResultTable
+ */
+struct WordsFileLine {
+  string word_;
+  bool isEntity_;
+  TextRecordIndex contextId_;
+  Score score_;
+  bool isLiteralEntity_ = false;
+};
+
+/**
+ * @brief Represents a line from the docsfile.tsv.
+ *
+ * This struct stores everything given in a line of the docsfile.tsv.
+ *
+ * The Fields are ordered in the same way the values follow in a line.
+ * Short field overview: DocumentIndex docId_, string docContent_
+ *
+ * @details
+ *
+ * Fields:
+ * - DocumentIndex docId_: The docId is needed to build inverted indices for
+ *                         scoring and building of the docsDB. It is also used
+ *                         to return actual texts when searching for a word.
+ *                         The word (and entity) search returns a table with
+ *                         TextRecordIndex as type of one column. Those get
+ *                         mapped to the next bigger or equal docId which is
+ *                         then used to extract the text from the docsDB.
+ * - string docContent_: The whole text given after the first tab of a line of
+ *                       docsfile.
+ */
+struct DocsFileLine {
+  DocumentIndex docId_;
+  string docContent_;
+};
+
+// Custom delimiter class for tokenization of literals using `absl::StrSplit`.
+// The `Find` function returns the next delimiter in `text` after the given
+// `pos` or an empty substring if there is no next delimiter.
+struct LiteralsTokenizationDelimiter {
+  absl::string_view Find(absl::string_view text, size_t pos) const {
+    auto isWordChar = [](char c) -> bool { return std::isalnum(c); };
+    auto found = std::find_if_not(text.begin() + pos, text.end(), isWordChar);
+    if (found == text.end()) return text.substr(text.size());
+    return {found, found + 1};
+  }
+};
+
+/**
+ * @brief A function that can be used to tokenize and normalize a given text.
+ * @warning Both params are const refs where the original objects have to be
+ * kept alive during the usage of the returned object.
+ * @param text The text to be tokenized and normalized.
+ * @param localeManager The localeManager to be used for normalization.
+ * @details This function can be used in the following way:
+ * for (auto normalizedWord : tokenizeAndNormalizeText(text, localeManager)) {
+ *  code;
+ * }
+ */
+inline auto tokenizeAndNormalizeText(std::string_view text,
+                                     const LocaleManager& localeManager) {
+  std::vector<std::string_view> split{
+      absl::StrSplit(text, LiteralsTokenizationDelimiter{}, absl::SkipEmpty{})};
+  return ql::views::transform(ad_utility::OwningView{std::move(split)},
+                              [&localeManager](const auto& str) {
+                                return localeManager.getLowercaseUtf8(str);
+                              });
+}
+/**
+ * @brief This class is the parent class of WordsFileParser and DocsFileParser
+ *
+ * @details It exists to reduce code duplication since the only difference
+ * between the child classes is the line type returned.
+ */
+class WordsAndDocsFileParser {
+ public:
+  explicit WordsAndDocsFileParser(const string& wordsOrDocsFile,
+                                  const LocaleManager& localeManager);
+  explicit WordsAndDocsFileParser(const WordsAndDocsFileParser& other) = delete;
+  WordsAndDocsFileParser& operator=(const WordsAndDocsFileParser& other) =
+      delete;
+
+ protected:
+  std::ifstream& getInputStream() { return in_; }
+  const LocaleManager& getLocaleManager() const { return localeManager_; }
+
+ private:
+  std::ifstream in_;
+  LocaleManager localeManager_;
+};
+
+/**
+ * @brief This class takes in the a pathToWordsFile and a localeManager. It then
+ * can be used to iterate the wordsFile while already normalizing the words
+ * using the localeManager. (If words are entities it doesn't normalize them)
+ *
+ * @details An object of this class can be iterated as follows:
+ * for (auto wordsFileLine : WordsFileParser{wordsFile, localeManager}) {
+ *  code;
+ * }
+ * The type of the value returned when iterating is WordsFileLine
+ */
+class WordsFileParser : public WordsAndDocsFileParser,
+                        public ad_utility::InputRangeFromGet<WordsFileLine> {
+ public:
+  using WordsAndDocsFileParser::WordsAndDocsFileParser;
+  Storage get() override;
+
+#ifndef NDEBUG
+ private:
+  // Only used for sanity checks in debug builds
+  TextRecordIndex lastCId_ = TextRecordIndex::make(0);
+#endif
+};
+
+/**
+ * @brief This class takes in the a pathToDocsFile and a localeManager. It then
+ * can be used to iterate over the docsFile to get the lines.
+ *
+ * @details An object of this class can be iterated as follows:
+ * for (auto docsFileLine : DocsFileParser{docsFile, localeManager}) {
+ *  code;
+ * }
+ * The type of the value returned when iterating is DocsFileLine
+ */
+class DocsFileParser : public WordsAndDocsFileParser,
+                       public ad_utility::InputRangeFromGet<DocsFileLine> {
+ public:
+  using WordsAndDocsFileParser::WordsAndDocsFileParser;
+  Storage get() override;
+};
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index bd375f4826..b9581312e8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -137,7 +137,7 @@ addLinkAndDiscoverTestSerial(FileTest)
 
 addLinkAndDiscoverTest(Simple8bTest)
 
-addLinkAndDiscoverTest(ContextFileParserTest parser)
+addLinkAndDiscoverTest(WordsAndDocsFileParserTest parser)
 
 addLinkAndDiscoverTest(IndexMetaDataTest index)
 
diff --git a/test/ContextFileParserTest.cpp b/test/ContextFileParserTest.cpp
deleted file mode 100644
index 2b27c0f34d..0000000000
--- a/test/ContextFileParserTest.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2015, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
-
-#include <gtest/gtest.h>
-
-#include <cstdio>
-#include <fstream>
-
-#include "../src/parser/ContextFileParser.h"
-
-TEST(ContextFileParserTest, getLineTest) {
-  char* locale = setlocale(LC_CTYPE, "");
-  std::cout << "Set locale LC_CTYPE to: " << locale << std::endl;
-
-  std::fstream f("_testtmp.contexts.tsv", std::ios_base::out);
-  f << "Foo\t0\t0\t2\n"
-       "foo\t0\t0\t2\n"
-       "Bär\t1\t0\t1\n"
-       "Äü\t0\t0\t1\n"
-       "X\t0\t1\t1\n";
-
-  f.close();
-  ContextFileParser p("_testtmp.contexts.tsv",
-                      LocaleManager("en", "US", false));
-  ContextFileParser::Line a;
-  ASSERT_TRUE(p.getLine(a));
-  ASSERT_EQ("foo", a._word);
-  ASSERT_FALSE(a._isEntity);
-  ASSERT_EQ(0u, a._contextId.get());
-  ASSERT_EQ(2u, a._score);
-
-  ASSERT_TRUE(p.getLine(a));
-  ASSERT_EQ("foo", a._word);
-  ASSERT_FALSE(a._isEntity);
-  ASSERT_EQ(0u, a._contextId.get());
-  ASSERT_EQ(2u, a._score);
-
-  ASSERT_TRUE(p.getLine(a));
-  ASSERT_EQ("Bär", a._word);
-  ASSERT_TRUE(a._isEntity);
-  ASSERT_EQ(0u, a._contextId.get());
-  ASSERT_EQ(1u, a._score);
-
-  ASSERT_TRUE(p.getLine(a));
-  ASSERT_EQ("äü", a._word);
-  ASSERT_FALSE(a._isEntity);
-  ASSERT_EQ(0u, a._contextId.get());
-  ASSERT_EQ(1u, a._score);
-
-  ASSERT_TRUE(p.getLine(a));
-  ASSERT_EQ("x", a._word);
-  ASSERT_FALSE(a._isEntity);
-  ASSERT_EQ(1u, a._contextId.get());
-  ASSERT_EQ(1u, a._score);
-
-  ASSERT_FALSE(p.getLine(a));
-  remove("_testtmp.contexts.tsv");
-};
diff --git a/test/WordsAndDocsFileLineCreator.h b/test/WordsAndDocsFileLineCreator.h
new file mode 100644
index 0000000000..cb151216fd
--- /dev/null
+++ b/test/WordsAndDocsFileLineCreator.h
@@ -0,0 +1,22 @@
+// Copyright 2024, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Felix Meisen (fesemeisen@outlook.de)
+
+#pragma once
+
+#include <absl/strings/str_cat.h>
+
+constexpr std::string_view inlineSeparator = "\t";
+constexpr std::string_view lineSeparator = "\n";
+
+inline std::string createWordsFileLineAsString(std::string_view word,
+                                               bool isEntity, size_t contextId,
+                                               size_t score) {
+  return absl::StrCat(word, inlineSeparator, isEntity, inlineSeparator,
+                      contextId, inlineSeparator, score, lineSeparator);
+};
+
+inline std::string createDocsFileLineAsString(size_t docId,
+                                              std::string_view docContent) {
+  return absl::StrCat(docId, inlineSeparator, docContent, lineSeparator);
+};
diff --git a/test/WordsAndDocsFileParserTest.cpp b/test/WordsAndDocsFileParserTest.cpp
new file mode 100644
index 0000000000..de7216ada7
--- /dev/null
+++ b/test/WordsAndDocsFileParserTest.cpp
@@ -0,0 +1,165 @@
+// Copyright 2015, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+
+#include <gtest/gtest.h>
+
+#include <cstdio>
+#include <fstream>
+
+#include "./WordsAndDocsFileLineCreator.h"
+#include "parser/WordsAndDocsFileParser.h"
+
+// All lambdas and type aliases used in this file contained here
+namespace {
+
+/// Type aliases
+
+// Word, isEntity, contextId, score
+using WordLine = std::tuple<std::string, bool, size_t, size_t>;
+using WordLineVec = std::vector<WordLine>;
+
+// docId, docContent
+using DocLine = std::tuple<size_t, std::string>;
+using DocLineVec = std::vector<DocLine>;
+
+using StringVec = std::vector<std::string>;
+
+/// Lambdas
+
+auto getLocaleManager = []() -> LocaleManager {
+  return LocaleManager("en", "US", false);
+};
+
+auto wordsFileLineToWordLine =
+    [](const WordsFileLine& wordsFileLine) -> WordLine {
+  return std::make_tuple(wordsFileLine.word_, wordsFileLine.isEntity_,
+                         static_cast<size_t>(wordsFileLine.contextId_.get()),
+                         static_cast<size_t>(wordsFileLine.score_));
+};
+
+// Lambda that takes in a path to wordsFile to initialize the Parser and an
+// expectedResult that is compared against the parsers outputs.
+auto testWordsFileParser = [](const std::string& wordsFilePath,
+                              const WordLineVec& expectedResult) {
+  size_t i = 0;
+  LocaleManager localeManager = getLocaleManager();
+  for (auto wordsFileLine : WordsFileParser{wordsFilePath, localeManager}) {
+    ASSERT_TRUE(i < expectedResult.size());
+    WordLine testLine = wordsFileLineToWordLine(wordsFileLine);
+
+    // Not testing the whole tuples against each other to have a cleaner
+    // indication what exactly caused the assertion to fail
+    ASSERT_EQ(std::get<0>(testLine), std::get<0>(expectedResult.at(i)));
+    ASSERT_EQ(std::get<1>(testLine), std::get<1>(expectedResult.at(i)));
+    ASSERT_EQ(std::get<2>(testLine), std::get<2>(expectedResult.at(i)));
+    ASSERT_EQ(std::get<3>(testLine), std::get<3>(expectedResult.at(i)));
+
+    ++i;
+  }
+  ASSERT_EQ(i, expectedResult.size());
+};
+
+auto docsFileLineToDocLine = [](const DocsFileLine& docsFileLine) -> DocLine {
+  return std::make_tuple(static_cast<size_t>(docsFileLine.docId_.get()),
+                         docsFileLine.docContent_);
+};
+
+// Same as testWordsFileParser but for docsFile
+auto testDocsFileParser = [](const std::string& docsFilePath,
+                             const DocLineVec& expectedResult) {
+  size_t i = 0;
+  LocaleManager localeManager = getLocaleManager();
+  for (auto docsFileLine : DocsFileParser{docsFilePath, localeManager}) {
+    ASSERT_TRUE(i < expectedResult.size());
+    DocLine testLine = docsFileLineToDocLine(docsFileLine);
+
+    // Not testing the whole tuples against each other to have a cleaner
+    // indication what exactly caused the assertion to fail
+    ASSERT_EQ(std::get<0>(testLine), std::get<0>(expectedResult.at(i)));
+    ASSERT_EQ(std::get<1>(testLine), std::get<1>(expectedResult.at(i)));
+
+    ++i;
+  }
+};
+
+// Passing the testText as copy to make sure it stays alive during the usage of
+// tokenizer
+auto testTokenizeAndNormalizeText = [](std::string testText,
+                                       const StringVec& normalizedTextAsVec) {
+  size_t i = 0;
+  LocaleManager localeManager = getLocaleManager();
+  for (auto normalizedWord :
+       tokenizeAndNormalizeText(testText, localeManager)) {
+    ASSERT_TRUE(i < normalizedTextAsVec.size());
+    ASSERT_EQ(normalizedWord, normalizedTextAsVec.at(i));
+
+    ++i;
+  }
+  ASSERT_EQ(i, normalizedTextAsVec.size());
+};
+
+}  // namespace
+
+TEST(WordsAndDocsFileParserTest, wordsFileParserTest) {
+  char* locale = setlocale(LC_CTYPE, "");
+  std::cout << "Set locale LC_CTYPE to: " << locale << std::endl;
+
+  std::fstream f("_testtmp.contexts.tsv", std::ios_base::out);
+  f << createWordsFileLineAsString("Foo", false, 0, 2)
+    << createWordsFileLineAsString("foo", false, 0, 2)
+    << createWordsFileLineAsString("Bär", true, 0, 1)
+    << createWordsFileLineAsString("Äü", false, 0, 1)
+    << createWordsFileLineAsString("X", false, 1, 1);
+  f.close();
+
+  WordLineVec expected = {{"foo", false, 0, 2},
+                          {"foo", false, 0, 2},
+                          {"Bär", true, 0, 1},
+                          {"äü", false, 0, 1},
+                          {"x", false, 1, 1}};
+
+  testWordsFileParser("_testtmp.contexts.tsv", expected);
+  remove("_testtmp.contexts.tsv");
+};
+
+TEST(WordsAndDocsFileParser, docsFileParserTest) {
+  char* locale = setlocale(LC_CTYPE, "");
+  std::cout << "Set locale LC_CTYPE to: " << locale << std::endl;
+
+  std::fstream f("_testtmp.documents.tsv", std::ios_base::out);
+  f << createDocsFileLineAsString(4, "This TeSt is OnlyCharcters")
+    << createDocsFileLineAsString(7, "Wh4t h4pp3ns t0 num83rs")
+    << createDocsFileLineAsString(8, "An( sp@ci*l ch.ar,:act=_er+s")
+    << createDocsFileLineAsString(190293, "Large docId");
+  f.close();
+
+  DocLineVec expected = {{4, "This TeSt is OnlyCharcters"},
+                         {7, "Wh4t h4pp3ns t0 num83rs"},
+                         {8, "An( sp@ci*l ch.ar,:act=_er+s"},
+                         {190293, "Large docId"}};
+
+  testDocsFileParser("_testtmp.documents.tsv", expected);
+  remove("_testtmp.documents.tsv");
+}
+
+TEST(TokenizeAndNormalizeText, tokenizeAndNormalizeTextTest) {
+  char* locale = setlocale(LC_CTYPE, "");
+  std::cout << "Set locale LC_CTYPE to: " << locale << std::endl;
+
+  // Test 1
+  testTokenizeAndNormalizeText("already normalized text",
+                               {"already", "normalized", "text"});
+
+  // Test 2
+  testTokenizeAndNormalizeText("TeXt WITH UpperCASe",
+                               {"text", "with", "uppercase"});
+
+  // Test 3
+  testTokenizeAndNormalizeText("41ph4num3r1c t3xt", {"41ph4num3r1c", "t3xt"});
+
+  // Test 4
+  testTokenizeAndNormalizeText(
+      "test\twith\ndifferent,separators.here ,.\t",
+      {"test", "with", "different", "separators", "here"});
+}
diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp
index eac3cb0d2f..cc9b685ec8 100644
--- a/test/engine/TextIndexScanForWordTest.cpp
+++ b/test/engine/TextIndexScanForWordTest.cpp
@@ -5,6 +5,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "../WordsAndDocsFileLineCreator.h"
 #include "../printers/VariablePrinters.h"
 #include "../util/GTestHelpers.h"
 #include "../util/IdTableHelpers.h"
@@ -26,45 +27,45 @@ std::string kg =
     ". <b> <x2> <x> . <b> <x2> <xb2> . <Astronomer> <is-a> <job> .";
 
 std::string wordsFileContent =
-    h::createWordsFileLine("astronomer", false, 1, 1) +
-    h::createWordsFileLine("<Astronomer>", true, 1, 0) +
-    h::createWordsFileLine("scientist", false, 1, 1) +
-    h::createWordsFileLine("field", false, 1, 1) +
-    h::createWordsFileLine("astronomy", false, 1, 1) +
-    h::createWordsFileLine("astronomer", false, 2, 0) +
-    h::createWordsFileLine("<Astronomer>", true, 2, 0) +
-    h::createWordsFileLine(":s:firstsentence", false, 2, 0) +
-    h::createWordsFileLine("scientist", false, 2, 0) +
-    h::createWordsFileLine("field", false, 2, 0) +
-    h::createWordsFileLine("astronomy", false, 2, 0) +
-    h::createWordsFileLine("astronomy", false, 3, 1) +
-    h::createWordsFileLine("concentrates", false, 3, 1) +
-    h::createWordsFileLine("studies", false, 3, 1) +
-    h::createWordsFileLine("specific", false, 3, 1) +
-    h::createWordsFileLine("question", false, 3, 1) +
-    h::createWordsFileLine("outside", false, 3, 1) +
-    h::createWordsFileLine("scope", false, 3, 1) +
-    h::createWordsFileLine("earth", false, 3, 1) +
-    h::createWordsFileLine("astronomy", false, 4, 1) +
-    h::createWordsFileLine("concentrates", false, 4, 1) +
-    h::createWordsFileLine("studies", false, 4, 1) +
-    h::createWordsFileLine("field", false, 4, 1) +
-    h::createWordsFileLine("outside", false, 4, 1) +
-    h::createWordsFileLine("scope", false, 4, 1) +
-    h::createWordsFileLine("earth", false, 4, 1) +
-    h::createWordsFileLine("tester", false, 5, 1) +
-    h::createWordsFileLine("rockets", false, 5, 1) +
-    h::createWordsFileLine("astronomer", false, 5, 1) +
-    h::createWordsFileLine("<Astronomer>", true, 5, 0) +
-    h::createWordsFileLine("although", false, 5, 1) +
-    h::createWordsFileLine("astronomer", false, 6, 0) +
-    h::createWordsFileLine("<Astronomer>", true, 6, 0) +
-    h::createWordsFileLine("although", false, 6, 0) +
-    h::createWordsFileLine("<Astronomer>", true, 6, 0) +
-    h::createWordsFileLine("space", false, 6, 1) +
-    h::createWordsFileLine("<Astronomer>", true, 7, 0) +
-    h::createWordsFileLine("space", false, 7, 0) +
-    h::createWordsFileLine("earth", false, 7, 1);
+    createWordsFileLineAsString("astronomer", false, 1, 1) +
+    createWordsFileLineAsString("<Astronomer>", true, 1, 0) +
+    createWordsFileLineAsString("scientist", false, 1, 1) +
+    createWordsFileLineAsString("field", false, 1, 1) +
+    createWordsFileLineAsString("astronomy", false, 1, 1) +
+    createWordsFileLineAsString("astronomer", false, 2, 0) +
+    createWordsFileLineAsString("<Astronomer>", true, 2, 0) +
+    createWordsFileLineAsString(":s:firstsentence", false, 2, 0) +
+    createWordsFileLineAsString("scientist", false, 2, 0) +
+    createWordsFileLineAsString("field", false, 2, 0) +
+    createWordsFileLineAsString("astronomy", false, 2, 0) +
+    createWordsFileLineAsString("astronomy", false, 3, 1) +
+    createWordsFileLineAsString("concentrates", false, 3, 1) +
+    createWordsFileLineAsString("studies", false, 3, 1) +
+    createWordsFileLineAsString("specific", false, 3, 1) +
+    createWordsFileLineAsString("question", false, 3, 1) +
+    createWordsFileLineAsString("outside", false, 3, 1) +
+    createWordsFileLineAsString("scope", false, 3, 1) +
+    createWordsFileLineAsString("earth", false, 3, 1) +
+    createWordsFileLineAsString("astronomy", false, 4, 1) +
+    createWordsFileLineAsString("concentrates", false, 4, 1) +
+    createWordsFileLineAsString("studies", false, 4, 1) +
+    createWordsFileLineAsString("field", false, 4, 1) +
+    createWordsFileLineAsString("outside", false, 4, 1) +
+    createWordsFileLineAsString("scope", false, 4, 1) +
+    createWordsFileLineAsString("earth", false, 4, 1) +
+    createWordsFileLineAsString("tester", false, 5, 1) +
+    createWordsFileLineAsString("rockets", false, 5, 1) +
+    createWordsFileLineAsString("astronomer", false, 5, 1) +
+    createWordsFileLineAsString("<Astronomer>", true, 5, 0) +
+    createWordsFileLineAsString("although", false, 5, 1) +
+    createWordsFileLineAsString("astronomer", false, 6, 0) +
+    createWordsFileLineAsString("<Astronomer>", true, 6, 0) +
+    createWordsFileLineAsString("although", false, 6, 0) +
+    createWordsFileLineAsString("<Astronomer>", true, 6, 0) +
+    createWordsFileLineAsString("space", false, 6, 1) +
+    createWordsFileLineAsString("<Astronomer>", true, 7, 0) +
+    createWordsFileLineAsString("space", false, 7, 0) +
+    createWordsFileLineAsString("earth", false, 7, 1);
 
 std::string firstDocText =
     "An astronomer is a scientist in the field of "
@@ -77,8 +78,8 @@ std::string secondDocText =
     "too although they might not be in space but on "
     "earth.";
 
-std::string docsFileContent = h::createDocsFileLine(4, firstDocText) +
-                              h::createDocsFileLine(7, secondDocText);
+std::string docsFileContent = createDocsFileLineAsString(4, firstDocText) +
+                              createDocsFileLineAsString(7, secondDocText);
 
 std::pair<std::string, std::string> contentsOfWordsFileAndDocsFile = {
     wordsFileContent, docsFileContent};
diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h
index 83a72ddea4..6ba1b8c6de 100644
--- a/test/engine/TextIndexScanTestHelpers.h
+++ b/test/engine/TextIndexScanTestHelpers.h
@@ -66,18 +66,4 @@ inline string combineToString(const string& text, const string& word) {
   ss << "Text: " << text << ", Word: " << word << std::endl;
   return ss.str();
 }
-
-inline std::string inlineSeparator = "\t";
-inline std::string lineSeparator = "\n";
-
-inline string createWordsFileLine(std::string word, bool isEntity,
-                                  size_t contextId, size_t score) {
-  return word + inlineSeparator + (isEntity ? "1" : "0") + inlineSeparator +
-         std::to_string(contextId) + inlineSeparator + std::to_string(score) +
-         lineSeparator;
-};
-
-inline string createDocsFileLine(size_t docId, std::string docContent) {
-  return std::to_string(docId) + inlineSeparator + docContent + lineSeparator;
-};
 }  // namespace textIndexScanTestHelpers