Skip to content

Commit

Permalink
Merge pull request #31 from filiph/fragment-fix
Browse files Browse the repository at this point in the history
Fix checking of fragments with non-ASCII chars
  • Loading branch information
filiph authored Nov 16, 2018
2 parents 52807c4 + c1826bd commit d50922f
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 2 deletions.
4 changes: 3 additions & 1 deletion lib/src/destination.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library linkcheck.destination;

import 'dart:io' show ContentType, HttpClientResponse, RedirectInfo;

import 'package:linkcheck/src/parsers/html.dart';

/// RegExp for detecting URI scheme, such as `http:`, `mailto:`, etc.
final _scheme = new RegExp(r"$(\w[\w\-]*\w):");

Expand Down Expand Up @@ -222,7 +224,7 @@ class Destination {
bool satisfiesFragment(String fragment) {
if (fragment == null || fragment == '') return true;
if (anchors == null) return false;
return anchors.contains(Uri.decodeComponent(fragment));
return anchors.contains(normalizeAnchor(fragment));
}

Map<String, Object> toMap() => {
Expand Down
21 changes: 20 additions & 1 deletion lib/src/parsers/html.dart
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ Link extractLink(
return new Link(origin, destination, destinationUri.fragment);
}

/// Takes an anchor (`id` or `name` attribute of an HTML element, or
/// a fragment of a link) and normalizes it.
///
/// Anchors that can be percent-decoded, will. ("Hr%C3%A1%C4%8Dek" will
/// become "Hráček".) Others will be kept the same. ("Hráček" will stay
/// "Hráček".)
String normalizeAnchor(String anchor) {
String decoded;
try {
decoded = Uri.decodeComponent(anchor);
} on ArgumentError {
// TODO: Report or handle ids and attributes that are not
// percent-decodable (they were not percent-encoded and they
// contain an invalid character.
decoded = anchor;
}
return decoded;
}

FetchResults parseHtml(String content, Uri uri, Destination current,
DestinationResult checked, bool ignoreLinks) {
var doc = parse(content, generateSpans: true, sourceUrl: uri.toString());
Expand All @@ -79,7 +98,7 @@ FetchResults parseHtml(String content, Uri uri, Destination current,
var anchors = doc
.querySelectorAll("body [id], body [name]")
.map((element) => element.attributes["id"] ?? element.attributes["name"])
.map((fragment) => Uri.decodeComponent(fragment))
.map(normalizeAnchor)
.toList();
checked.anchors = anchors;

Expand Down
10 changes: 10 additions & 0 deletions test/case13/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title></title>
</head>
<body>
<a href="with-non-percent-encoded-anchor.html">APL</a>
</body>
</html>
10 changes: 10 additions & 0 deletions test/case13/with-non-percent-encoded-anchor.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title></title>
</head>
<body>
<h1 id="cite_ref-Berättar_104-0">RMS Berättar</h1>
</body>
</html>
10 changes: 10 additions & 0 deletions test/e2e_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ void main() {
await server.destroy();
}
});

test("fragment checking works with non-percent-encoded anchors", () async {
var server = await Dhttpd.start(path: getServingPath(13), port: port);
try {
int result = await run([":$port"], out);
expect(result, 0);
} finally {
await server.destroy();
}
});
}, tags: ["integration"]);
}

Expand Down

0 comments on commit d50922f

Please sign in to comment.