Skip to content

Commit

Permalink
Verify whether the number of documents in the track is correct
Browse files Browse the repository at this point in the history
Closes #296
  • Loading branch information
danielmitterdorfer committed Aug 8, 2017
1 parent 4cd1cd7 commit 9d6c83a
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
5 changes: 4 additions & 1 deletion esrally/track/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,10 @@ def decompress(data_set_path, expected_size_in_bytes):
raise exceptions.DataError("Track data file [%s] is missing." % type.document_archive)
decompressed_file_path, was_decompressed = decompress(type.document_archive, type.uncompressed_size_in_bytes)
# just rebuild the file every time for the time being. Later on, we might check the data file fingerprint to avoid it
io.prepare_file_offset_table(decompressed_file_path)
lines_read = io.prepare_file_offset_table(decompressed_file_path)
if lines_read and lines_read != type.number_of_lines:
raise exceptions.DataError("Data in [%s] for track [%s] are invalid. Expected [%d] lines but got [%d]."
% (decompressed_file_path, track, type.number_of_lines, lines_read))
else:
logger.info("Type [%s] in index [%s] does not define a document archive. No data are indexed from a file for this type." %
(type.name, index.name))
Expand Down
7 changes: 7 additions & 0 deletions esrally/track/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ def has_valid_document_data(self):
return self.document_file is not None and \
self.number_of_documents > 0

@property
def number_of_lines(self):
if self.includes_action_and_meta_data:
return self.number_of_documents * 2
else:
return self.number_of_documents

def __str__(self, *args, **kwargs):
return self.name

Expand Down
3 changes: 3 additions & 0 deletions esrally/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def prepare_file_offset_table(data_file_path):
#skip_lines(data_file_path, data_file) to speed up line skipping.
:param data_file_path: The path to a text file that is readable by this process.
:return The number of lines read or ``None`` if it did not have to build the file offset table.
"""
offset_file_path = "%s.offset" % data_file_path
# recreate only if necessary as this can be time-consuming
Expand All @@ -274,8 +275,10 @@ def prepare_file_offset_table(data_file_path):
if line_number % 50000 == 0:
print("%d;%d" % (line_number, data_file.tell()), file=offset_file)
console.println("[OK]")
return line_number
else:
logger.info("Skipping creation of file offset table at [%s] as it is still valid." % offset_file_path)
return None


def skip_lines(data_file_path, data_file, number_of_lines_to_skip):
Expand Down

0 comments on commit 9d6c83a

Please sign in to comment.