From d9682cf71623768332d71f4dd42f074e52aec832 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Thu, 13 Jan 2022 14:00:12 -0800 Subject: [PATCH 1/4] Improve string escapes for String#to_sxp. --- .github/workflows/ci.yml | 2 +- lib/sxp/extensions.rb | 20 ++++++++++++++++++-- lib/sxp/reader/basic.rb | 8 ++++---- lib/sxp/reader/common_lisp.rb | 2 +- lib/sxp/reader/sparql.rb | 2 +- spec/extensions_spec.rb | 29 +++++++++++++++++++++++++++++ spec/reader_spec.rb | 29 +++++++++++++++++++++++++++++ 7 files changed, 83 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0aaedb9..9a760e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: ruby: - 2.6 - 2.7 - - 3.0 + - "3.0" - 3.1 - ruby-head - jruby diff --git a/lib/sxp/extensions.rb b/lib/sxp/extensions.rb index 6258dba..193742a 100644 --- a/lib/sxp/extensions.rb +++ b/lib/sxp/extensions.rb @@ -55,11 +55,27 @@ def to_sxp(**options) # Extensions for Ruby's `String` class. class String ## - # Returns the SXP representation of this object. + # Returns the SXP representation of this object. Uses SPARQL-like escaping. # # @return [String] def to_sxp(**options) - inspect + buffer = "" + each_char do |u| + buffer << case u.ord + when (0x00..0x07) then sprintf("\\u%04X", u.ord) + when (0x08) then '\b' + when (0x09) then '\t' + when (0x0A) then '\n' + when (0x0C) then '\f' + when (0x0D) then '\r' + when (0x0E..0x1F) then sprintf("\\u%04X", u.ord) + when (0x22) then '\"' + when (0x5C) then '\\' + when (0x7F) then sprintf("\\u%04X", u.ord) + else u.chr + end + end + '"' + buffer + '"' end end diff --git a/lib/sxp/reader/basic.rb b/lib/sxp/reader/basic.rb index e34af45..e23baf6 100644 --- a/lib/sxp/reader/basic.rb +++ b/lib/sxp/reader/basic.rb @@ -35,7 +35,7 @@ def read_atom ## # @return [String] def read_string - buffer = String.new + buffer = "" skip_char # '"' until peek_char == ?" #" buffer << @@ -57,8 +57,8 @@ def read_character when ?n then ?\n when ?r then ?\r when ?t then ?\t - when ?u then read_chars(4).to_i(16).chr - when ?U then read_chars(8).to_i(16).chr + when ?u then read_chars(4).to_i(16).chr(Encoding::UTF_8) + when ?U then read_chars(8).to_i(16).chr(Encoding::UTF_8) when ?" then char #" when ?\\ then char else char @@ -69,7 +69,7 @@ def read_character # @return [String] def read_literal grammar = self.class.const_get(:ATOM) - buffer = String.new + buffer = "" buffer << read_char while !eof? && peek_char.chr =~ grammar buffer end diff --git a/lib/sxp/reader/common_lisp.rb b/lib/sxp/reader/common_lisp.rb index 42e3619..44b6203 100644 --- a/lib/sxp/reader/common_lisp.rb +++ b/lib/sxp/reader/common_lisp.rb @@ -72,7 +72,7 @@ def read_sharp ## # @return [Symbol] def read_symbol(delimiter = nil) - buffer = String.new + buffer = "" skip_char # '|' until delimiter === peek_char buffer << diff --git a/lib/sxp/reader/sparql.rb b/lib/sxp/reader/sparql.rb index bf95ae1..b87ed19 100644 --- a/lib/sxp/reader/sparql.rb +++ b/lib/sxp/reader/sparql.rb @@ -171,7 +171,7 @@ def read_rdf_literal # # @return [RDF::URI] def read_rdf_uri - buffer = String.new + buffer = "" skip_char # '<' return :< if (char = peek_char).nil? || char.chr !~ ATOM # FIXME: nasty special case for the '< symbol return :<= if peek_char.chr.eql?(?=.chr) && read_char # FIXME: nasty special case for the '<= symbol diff --git a/spec/extensions_spec.rb b/spec/extensions_spec.rb index 901b1c8..0666e63 100644 --- a/spec/extensions_spec.rb +++ b/spec/extensions_spec.rb @@ -24,6 +24,35 @@ expect(value.to_sxp).to eq result end end + + describe "string escapes" do + { + "\b" => %{"\\b"}, + "\f" => %{"\\f"}, + "\n" => %{"\\n"}, + "\r" => %{"\\r"}, + "\t" => %{"\\t"}, + "\u0080" => %{"\u0080"}, + "\u07FF" => %("\u07FF"), + "\u0800" => %("\u0800"), + "\u0FFF" => %("\u0FFF"), + "\u1000" => %("\u1000"), + "\uD000" => %("\uD000"), + "\uD7FF" => %("\uD7FF"), + "\uE000" => %("\uE000"), + "\uFFFD" => %("\uFFFD"), + "\u{10000}" => %("\u{010000}"), + "\u{3FFFD}" => %("\u{03FFFD}"), + "\u{40000}" => %("\u{040000}"), + "\u{FFFFD}" => %("\u{0FFFFD}"), + "\u{100000}" => %("\u{100000}"), + "\u{10FFFD}" => %("\u{10FFFD}"), + }.each do |value, result| + it "writes #{value} as #{result.inspect}" do + expect(value.to_sxp).to eq result + end + end + end end describe "Vector" do diff --git a/spec/reader_spec.rb b/spec/reader_spec.rb index 61f3bd6..b2212ad 100644 --- a/spec/reader_spec.rb +++ b/spec/reader_spec.rb @@ -34,6 +34,35 @@ end end + context "escapes in strings" do + { + %q{"\b"} => "\b", + %q{"\f"} => "\f", + %q{"\n"} => "\n", + %q{"\r"} => "\r", + %q{"\t"} => "\t", + %q{"\u0080"} => "\u0080", + %q("\u07FF") => "\u07FF", + %q("\u0800") => "\u0800", + %q("\u0FFF") => "\u0FFF", + %q("\u1000") => "\u1000", + %q("\uD000") => "\uD000", + %q("\uD7FF") => "\uD7FF", + %q("\uE000") => "\uE000", + %q("\uFFFD") => "\uFFFD", + %q("\U00010000") => "\u{10000}", + %q("\U0003FFFD") => "\u{3FFFD}", + %q("\U00040000") => "\u{40000}", + %q("\U000FFFFD") => "\u{FFFFD}", + %q("\U00100000") => "\u{100000}", + %q("\U0010FFFD") => "\u{10FFFD}", + }.each do |input, output| + it "reads #{input} as #{output.inspect}" do + expect(read(input)).to eq output + end + end + end + context "problematic examples" do { %q{"\t'[]()-"} => "\t'[]()-", From 2df5a351bcf006d58c737deb484ff62189e20f75 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 16 Jan 2022 15:50:59 -0800 Subject: [PATCH 2/4] Improve string escapes for RDF::Literal#to_sxp. --- lib/sxp/extensions.rb | 4 ++-- spec/extensions_spec.rb | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/sxp/extensions.rb b/lib/sxp/extensions.rb index 193742a..b1e7c3c 100644 --- a/lib/sxp/extensions.rb +++ b/lib/sxp/extensions.rb @@ -245,7 +245,7 @@ def to_sxp(prefixes: nil, base_uri: nil, **options) class RDF::URI ## - # Returns the SXP representation of this a URI. Uses Lexical representation, if set, otherwise, any PName match, otherwise, the relativized version of the URI if a base_uri is given, otherwise just the URI. + # Returns the SXP representation of this URI. Uses Lexical representation, if set, otherwise, any PName match, otherwise, the relativized version of the URI if a base_uri is given, otherwise just the URI. # # @param [Hash{Symbol => RDF::URI}] prefixes(nil) # @param [RDF::URI] base_uri(nil) @@ -284,7 +284,7 @@ def to_sxp(**options) # Retain stated lexical form if possible valid? ? to_s : object.to_sxp(**options) else - text = value.dump + text = value.to_sxp text << "@#{language}" if self.has_language? text << "^^#{datatype.to_sxp(**options)}" if self.has_datatype? text diff --git a/spec/extensions_spec.rb b/spec/extensions_spec.rb index 0666e63..e048e0c 100644 --- a/spec/extensions_spec.rb +++ b/spec/extensions_spec.rb @@ -85,6 +85,35 @@ }.each_pair do |l, sxp| specify {expect(l.to_sxp).to eq sxp} end + + describe "string escapes" do + { + "\b" => %{"\\b"}, + "\f" => %{"\\f"}, + "\n" => %{"\\n"}, + "\r" => %{"\\r"}, + "\t" => %{"\\t"}, + "\u0080" => %{"\u0080"}, + "\u07FF" => %("\u07FF"), + "\u0800" => %("\u0800"), + "\u0FFF" => %("\u0FFF"), + "\u1000" => %("\u1000"), + "\uD000" => %("\uD000"), + "\uD7FF" => %("\uD7FF"), + "\uE000" => %("\uE000"), + "\uFFFD" => %("\uFFFD"), + "\u{10000}" => %("\u{010000}"), + "\u{3FFFD}" => %("\u{03FFFD}"), + "\u{40000}" => %("\u{040000}"), + "\u{FFFFD}" => %("\u{0FFFFD}"), + "\u{100000}" => %("\u{100000}"), + "\u{10FFFD}" => %("\u{10FFFD}"), + }.each do |value, result| + it "writes #{value} as #{result.inspect}" do + expect(RDF::Literal(value).to_sxp).to eq result + end + end + end end describe "RDF::Literal#to_sxp with prefix" do From 6d943e34c3ccf4ee3571edfac57db5fff9666801 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 16 Jan 2022 18:12:22 -0800 Subject: [PATCH 3/4] Escape backslash. --- lib/sxp/extensions.rb | 2 +- spec/extensions_spec.rb | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/sxp/extensions.rb b/lib/sxp/extensions.rb index b1e7c3c..934ebf1 100644 --- a/lib/sxp/extensions.rb +++ b/lib/sxp/extensions.rb @@ -70,7 +70,7 @@ def to_sxp(**options) when (0x0D) then '\r' when (0x0E..0x1F) then sprintf("\\u%04X", u.ord) when (0x22) then '\"' - when (0x5C) then '\\' + when (0x5C) then '\\\\' when (0x7F) then sprintf("\\u%04X", u.ord) else u.chr end diff --git a/spec/extensions_spec.rb b/spec/extensions_spec.rb index e048e0c..03a0d25 100644 --- a/spec/extensions_spec.rb +++ b/spec/extensions_spec.rb @@ -32,6 +32,7 @@ "\n" => %{"\\n"}, "\r" => %{"\\r"}, "\t" => %{"\\t"}, + "\\" => %{"\\\\"}, "\u0080" => %{"\u0080"}, "\u07FF" => %("\u07FF"), "\u0800" => %("\u0800"), @@ -93,6 +94,7 @@ "\n" => %{"\\n"}, "\r" => %{"\\r"}, "\t" => %{"\\t"}, + "\\" => %{"\\\\"}, "\u0080" => %{"\u0080"}, "\u07FF" => %("\u07FF"), "\u0800" => %("\u0800"), From 8f6ab9a661bc86286512adf2aeee40da0c8cad7d Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 16 Jan 2022 18:17:02 -0800 Subject: [PATCH 4/4] Version 1.2.1. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 26aaba0..6085e94 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.0 +1.2.1