Skip to content

Commit

Permalink
Little optimisations to record parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jelmervdl committed Nov 3, 2023
1 parent 6cb12b9 commit 1217d71
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions src/record.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ namespace warc2text {
if (header.count("warc-target-uri") == 1) {
// respect the original casing
url = header["warc-target-uri"];
}

if (!url.empty() && url[0] == '<' && url[url.size()-1] == '>')
url = url.substr(1, url.size()-2);
// Remove any "<" and ">" wrappings from the URL
if (!url.empty() && url[0] == '<' && url[url.size()-1] == '>')
url = url.substr(1, url.size()-2);
}

if (header.count("content-type") == 1) {
WARCcontentType = header["content-type"];
Expand All @@ -80,7 +81,7 @@ namespace warc2text {
}

payload_start = last_pos;
if (header["warc-type"] == "response") {
if (recordType == "response") {
// parse HTTP header
pos = content.find("HTTP/1.", last_pos);
if (pos == last_pos) { // found HTTP header
Expand Down

0 comments on commit 1217d71

Please sign in to comment.