Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

N3 parsing of local names with special characters #523

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 67 additions & 17 deletions src/n3parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ $Id: n3parser.js 14561 2008-02-23 06:37:26Z kennyluck $

HAND EDITED FOR CONVERSION TO JAVASCRIPT

This module implements a Nptation3 parser, and the final
This module implements a Notation3 parser, and the final
part of a notation3 serializer.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
part of a notation3 serializer.
part of a Notation3 serializer.


See also:
Expand Down Expand Up @@ -201,9 +201,12 @@ var DATE_DATATYPE = "http://www.w3.org/2001/XMLSchema#date";
var DATETIME_DATATYPE = "http://www.w3.org/2001/XMLSchema#dateTime";
var BOOLEAN_DATATYPE = "http://www.w3.org/2001/XMLSchema#boolean";
var option_noregen = 0;
var _notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~";
var _notNameChars = ( _notQNameChars + ":" ) ;
var _notQNameChars = "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~"; // else valid qname :-/
var _notKeywordsChars = ( _notQNameChars + "." ) ;
var _notNameChars = ( _notQNameChars + ":" ) ; // Assume anything else valid name :-/
var _rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
var hexChars = "ABCDEFabcdef0123456789";
var escapeChars = "(_~.-!$&'()*+,;=/?#@%)"; // valid for \ escapes in localnames
var N3CommentCharacter = "#";
var eol = new RegExp("^[ \\t]*(#[^\\n]*)?\\r?\\n", 'g');
var eof = new RegExp("^[ \\t]*(#[^\\n]*)?$", 'g');
Expand Down Expand Up @@ -360,7 +363,7 @@ __SinkParser.prototype.tok = function(tok, str, i) {
}
}
var k = ( i + pyjslib_len(tok) ) ;
if ((str.slice( i, k) == tok) && (_notQNameChars.indexOf(str.charAt(k)) >= 0)) {
if ((str.slice( i, k) == tok) && (_notKeywordsChars.indexOf(str.charAt(k)) >= 0)) {
return k;
}
else {
Expand Down Expand Up @@ -626,7 +629,7 @@ __SinkParser.prototype.path = function(str, i, res) {
var ch = str.slice( j, ( j + 1 ) );
if ((ch == ".")) {
var ahead = str.slice( ( j + 1 ) , ( j + 2 ) );
if (!(ahead) || (_notNameChars.indexOf(ahead) >= 0) && (":?<[{(".indexOf(ahead) < 0)) {
if (!(ahead) || (_notKeywordsChars.indexOf(ahead) >= 0) && (":?<[{(".indexOf(ahead) < 0) || (ahead == "%") ) {
break;
}
}
Expand Down Expand Up @@ -1168,7 +1171,7 @@ __SinkParser.prototype.variable = function(str, i, res) {
throw BadSyntax(this._thisDoc, this.lines, str, j, ( ( "Varible name can't start with '" + str.charAt(j) ) + "s'" ) );
return -1;
}
while ((i < pyjslib_len(str)) && (_notNameChars.indexOf(str.charAt(i)) < 0)) {
while ((i < pyjslib_len(str)) && (_notKeywordsChars.indexOf(str.charAt(i)) < 0)) {
var i = ( i + 1 ) ;
}
if ((this._parentContext == null)) {
Expand All @@ -1190,19 +1193,18 @@ __SinkParser.prototype.bareWord = function(str, i, res) {
if (("0123456789-".indexOf(ch) >= 0)) {
return -1;
}
if ((_notNameChars.indexOf(ch) >= 0)) {
if ((_notKeywordsChars.indexOf(ch) >= 0)) {
return -1;
}
var i = j;
while ((i < pyjslib_len(str)) && (_notNameChars.indexOf(str.charAt(i)) < 0)) {
while ((i < pyjslib_len(str)) && (_notKeywordsChars.indexOf(str.charAt(i)) < 0)) {
var i = ( i + 1 ) ;
}
res.push(str.slice( j, i));
return i;
};
__SinkParser.prototype.qname = function(str, i, res) {
/*

xyz:def -> ('xyz', 'def')
If not in keywords and keywordsSet: def -> ('', 'def')
:def -> ('', 'def')
Expand All @@ -1218,35 +1220,83 @@ __SinkParser.prototype.qname = function(str, i, res) {
}
if ((_notNameChars.indexOf(c) < 0)) {
var ln = c;
var i = ( i + 1 ) ;
while ((i < pyjslib_len(str))) {
var i = ( i + 1 ) ;
while (i < pyjslib_len(str)) {
var c = str.charAt(i);
if ((_notNameChars.indexOf(c) < 0)) {
if ((_notNameChars.indexOf(c) < 0)) {
var ln = ( ln + c ) ;
var i = ( i + 1 ) ;
}
else {
break;
}
}

if(str.charAt(i - 1) == ".") { // qname cannot end with "."
var i = ( i - 1 ) ;
if (ln.length == 0) {
return -1;
}
ln = ln.slice(0, -1);
}
}
else {
var ln = "";
}
if ((i < pyjslib_len(str)) && (str.charAt(i) == ":")) {
var pfx = ln;
var i = ( i + 1 ) ;
// bnodes names have different rules
if (pfx == "_") {
var allowedChars = _notNameChars
}
else {
var allowedChars = _notQNameChars
}
var i = ( i + 1 ) ;
var lastslash = false;
var ln = "";
while ((i < pyjslib_len(str))) {
while (i < pyjslib_len(str)) {
var c = str.charAt(i);
if ((_notNameChars.indexOf(c) < 0)) {
var ln = ( ln + c ) ;
var i = ( i + 1 ) ;
if (c == "\\" && !(lastslash)) {
var lastslash = true;
}
else if ((allowedChars.indexOf(c) < 0) || lastslash) {
if (lastslash) {
if (escapeChars.indexOf(c) < 0) {
throw BadSyntax(this._thisDoc, this.lines, str, i, "illegal escape " + c);
}
}
else if (c == "%") {
if (i == pyjslib_len(str) - 2) {
throw BadSyntax(this._thisDoc, this.lines, str, i, "illegal hex escape % (EOF)");
}
var ec1 = str.charAt(i + 1);
var ec2 = str.charAt(i + 2);
if (hexChars.indexOf(ec1) < 0 || hexChars.indexOf(ec2) < 0 ) {
throw BadSyntax(this._thisDoc, this.lines, str, i, "illegal hex escape %" + ec1 + ec2);
}
}
var lastslash = false;
var ln = ( ln + c ) ;
}
else {
break;
}
var i = ( i + 1 ) ;
}

if (lastslash) {
throw BadSyntax(this._thisDoc, this.lines, str, i, "qname cannot end with \\");
}

if(str.charAt(i - 1) == ".") { // localname cannot end in .
if (ln.length == 0) {
return -1;
}
var i = ( i - 1 ) ;
ln = ln.slice(0, -1);
}

res.push(new pyjslib_Tuple([pfx, ln]));
return i;
}
Expand Down