From 2fb9908c472566140a2fe0e23d713f7b05e22ff9 Mon Sep 17 00:00:00 2001 From: nick evans Date: Sun, 22 Jan 2023 13:14:49 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20nameprep=20stringprep=20profi?= =?UTF-8?q?le?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This isn't needed for Net::IMAP, but adding a new profile is trivial with the new generic stringprep code. Nameprep is used by IDNA, and not useful without punycode. But, with the addition of punycode, Net::IMAP could also support IDNA hostnames. On the other hand, maybe punycode belongs more to `uri` than `net-imap`? --- lib/net/imap/stringprep.rb | 1 + lib/net/imap/stringprep/nameprep.rb | 70 ++++++++ test/net/imap/test_stringprep_nameprep.rb | 198 ++++++++++++++++++++++ test/net/imap/test_stringprep_profiles.rb | 1 - 4 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 lib/net/imap/stringprep/nameprep.rb create mode 100644 test/net/imap/test_stringprep_nameprep.rb diff --git a/lib/net/imap/stringprep.rb b/lib/net/imap/stringprep.rb index cc7776e8..c18e1dd7 100644 --- a/lib/net/imap/stringprep.rb +++ b/lib/net/imap/stringprep.rb @@ -9,6 +9,7 @@ class IMAP < Protocol # codepoint table defined in the RFC-3454 appendices is matched by a Regexp # defined in this module. module StringPrep + autoload :NamePrep, File.expand_path("stringprep/nameprep", __dir__) autoload :SASLprep, File.expand_path("stringprep/saslprep", __dir__) autoload :Tables, File.expand_path("stringprep/tables", __dir__) autoload :Trace, File.expand_path("stringprep/trace", __dir__) diff --git a/lib/net/imap/stringprep/nameprep.rb b/lib/net/imap/stringprep/nameprep.rb new file mode 100644 index 00000000..5cfc8ae2 --- /dev/null +++ b/lib/net/imap/stringprep/nameprep.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Net + class IMAP + module StringPrep + + # Defined in RFC3491[https://tools.ietf.org/html/rfc3491], the +nameprep+ + # profile of "Stringprep" is: + # >>> + # used by the IDNA protocol for preparing domain names; it is not + # designed for any other purpose. It is explicitly not designed for + # processing arbitrary free text and SHOULD NOT be used for that + # purpose. + # + # ... + # + # This profile specifies prohibiting using the following tables...: + # + # - C.1.2 (Non-ASCII space characters) + # - C.2.2 (Non-ASCII control characters) + # - C.3 (Private use characters) + # - C.4 (Non-character code points) + # - C.5 (Surrogate codes) + # - C.6 (Inappropriate for plain text) + # - C.7 (Inappropriate for canonical representation) + # - C.8 (Change display properties are deprecated) + # - C.9 (Tagging characters) + # + # IMPORTANT NOTE: This profile MUST be used with the IDNA protocol. + # The IDNA protocol has additional prohibitions that are checked + # outside of this profile. + module NamePrep + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §10 + STRINGPREP_PROFILE = "nameprep" + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §2 + UNASSIGNED_TABLE = "A.1" + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §3 + MAPPING_TABLES = %w[B.1 B.2].freeze + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §4 + NORMALIZATION = :nfkc + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §5 + PROHIBITED_TABLES = %w[C.1.2 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9].freeze + + # From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §6 + CHECK_BIDI = true + + module_function + + def nameprep(string, **opts) + StringPrep.stringprep( + string, + unassigned: UNASSIGNED_TABLE, + maps: MAPPING_TABLES, + prohibited: PROHIBITED_TABLES, + normalization: NORMALIZATION, + bidi: CHECK_BIDI, + profile: STRINGPREP_PROFILE, + **opts, + ) + end + end + + end + end +end diff --git a/test/net/imap/test_stringprep_nameprep.rb b/test/net/imap/test_stringprep_nameprep.rb new file mode 100644 index 00000000..6ed400b1 --- /dev/null +++ b/test/net/imap/test_stringprep_nameprep.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +require "net/imap" +require "test/unit" + +class StringPrepNamePrepTest < Test::Unit::TestCase + include Net::IMAP::StringPrep + include Net::IMAP::StringPrep::NamePrep + + # The following test cases were taken from + # https://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.txt + # ...mostly from Appendix A. + + # Hash[name, [in, out | exception, stored = false] + NAMEPREP_TEST_VECTORS = { + "Map to nothing" => [ + "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" \ + "bar\xE2\x80\x8B\xE2\x81\xA0" \ + "baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF", + "foobarbaz" + ], + "Case folding ASCII U+0043 U+0041 U+0046 U+0045" => [ + "CAFE", "cafe" + ], + "Case folding 8bit U+00DF (german sharp s)" => [ + "\xC3\x9F", "ss" + ], + "Case folding U+0130 (turkish capital I with dot)" => [ + "\xC4\xB0", "i\xcc\x87" + ], + "Case folding multibyte U+0143 U+037A" => [ + "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9" + ], + "Case folding U+2121 U+33C6 U+1D7BB" => [ + "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB", + "telc\xE2\x88\x95""kg\xCF\x83" + ], + "Normalization of U+006a U+030c U+00A0 U+00AA" => [ + "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a" + ], + "Case folding U+1FB7 and normalization" => [ + "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9" + ], + "Incorrect UTF-8 encoding of U+00DF" => [ + # n.b. this example isn't found in Appendix A, but is in §7. + "\xC3\xdf", [ArgumentError, /invalid byte sequence in UTF-8/] + ], + "Incorrect UTF-8 encoding of U+01F0" => [ + # n.b. Appendix A doesn't indicate an error for this, but §7 does. + "\xC7\xF0", [ArgumentError, /invalid byte sequence in UTF-8/] + ], + "Self-reverting case folding U+0390 and normalization" => [ + "\xCE\x90", "\xCE\x90" + ], + "Self-reverting case folding U+03B0 and normalization" => [ + "\xCE\xB0", "\xCE\xB0" + ], + "Self-reverting case folding U+1E96 and normalization" => [ + "\xE1\xBA\x96", "\xE1\xBA\x96" + ], + "Self-reverting case folding U+1F56 and normalization" => [ + "\xE1\xBD\x96", "\xE1\xBD\x96" + ], + "ASCII space character U+0020" => [ + "\x20", "\x20" + ], + "Non-ASCII 8bit space character U+00A0" => [ + "\xC2\xA0", "\x20" + ], + "Non-ASCII multibyte space character U+1680" => [ + "\xE1\x9A\x80", ProhibitedCodepoint + ], + "Non-ASCII multibyte space character U+2000" => [ + "\xE2\x80\x80", "\x20" + ], + "Zero Width Space U+200b" => [ + "\xE2\x80\x8b", "" + ], + "Non-ASCII multibyte space character U+3000" => [ + "\xE3\x80\x80", "\x20" + ], + "ASCII control characters U+0010 U+007F" => [ + "\x10\x7F", "\x10\x7F" + ], + "Non-ASCII 8bit control character U+0085" => [ + "\xC2\x85", ProhibitedCodepoint + ], + "Non-ASCII multibyte control character U+180E" => [ + "\xE1\xA0\x8E", ProhibitedCodepoint + ], + "Zero Width No-Break Space U+FEFF" => [ + "\xEF\xBB\xBF", "" + ], + "Non-ASCII control character U+1D175" => [ + "\xF0\x9D\x85\xB5", ProhibitedCodepoint + ], + "Plane 0 private use character U+F123" => [ + "\xEF\x84\xA3", ProhibitedCodepoint + ], + "Plane 15 private use character U+F1234" => [ + "\xF3\xB1\x88\xB4", ProhibitedCodepoint + ], + "Plane 16 private use character U+10F234" => [ + "\xF4\x8F\x88\xB4", ProhibitedCodepoint + ], + "Non-character code point U+8FFFE" => [ + "\xF2\x8F\xBF\xBE", ProhibitedCodepoint + ], + "Non-character code point U+10FFFF" => [ + "\xF4\x8F\xBF\xBF", ProhibitedCodepoint + ], + "Surrogate code U+DF42" => [ + "\xED\xBD\x82", [ArgumentError, /invalid byte sequence in UTF-8/] + ], + "Non-plain text character U+FFFD" => [ + "\xEF\xBF\xBD", ProhibitedCodepoint + ], + "Ideographic description character U+2FF5" => [ + "\xE2\xBF\xB5", ProhibitedCodepoint + ], + "Display property character U+0341" => [ + "\xCD\x81", "\xCC\x81" + ], + "Left-to-right mark U+200E" => [ + "\xE2\x80\x8E", ProhibitedCodepoint + ], + "Deprecated U+202A" => [ + "\xE2\x80\xAA", ProhibitedCodepoint + ], + "Language tagging character U+E0001" => [ + "\xF3\xA0\x80\x81", ProhibitedCodepoint + ], + "Language tagging character U+E0042" => [ + "\xF3\xA0\x81\x82", ProhibitedCodepoint + ], + "Bidi: RandALCat character U+05BE and LCat characters" => [ + "foo\xD6\xBE""bar", + [BidiStringError, /string with RandALCat.* must not contain LCat/] + ], + "Bidi: RandALCat character U+FD50 and LCat characters" => [ + "foo\xEF\xB5\x90""bar", + [BidiStringError, /string with RandALCat.* must not contain LCat/] + ], + "Bidi: RandALCat character U+FB38 and LCat characters" => [ + "foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar" + ], + "Bidi: RandALCat without trailing RandALCat U+0627 U+0031" => [ + "\xD8\xA7\x31", + [BidiStringError, + /string with RandALCat.* must start and end with RandALCat/] + ], + "Bidi: RandALCat character U+0627 U+0031 U+0628" => [ + "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8" + ], + "Unassigned code point U+E0002" => [ + "\xF3\xA0\x80\x82", + [ProhibitedCodepoint, /contains.* unassigned code points.*Unicode 3.2/i], + true + ], + "Larger test (shrinking)" => [ + "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" \ + "\xaa\xce\xb0\xe2\x80\x80", + "xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 ", + "Nameprep" + ], + "Larger test (expanding)" => [ + "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80", + "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" \ + "\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82\xa2\xe3\x83\x91" \ + "\xe3\x83\xbc\xe3\x83\x88" + ], + } + + NAMEPREP_TEST_VECTORS.each do |comment, (input, output, stored)| + test_name = "test_nameprep_%s" % comment.downcase.gsub(/\W/, "_") + stored ||= false + ex, message = output + case output + when String + define_method test_name do + assert_equal output, nameprep(input, stored: stored), comment + end + when Class + if message # in Class => ex, (String | Regexp) => message + define_method test_name do + assert_raise_with_message(ex, message, comment) { + nameprep(input, stored: stored) + } + end + else # in Class => ex + define_method test_name do + assert_raise(ex, comment) { nameprep(input, stored: stored) } + end + end + end + end + +end diff --git a/test/net/imap/test_stringprep_profiles.rb b/test/net/imap/test_stringprep_profiles.rb index 37c48fdc..5487ffbb 100644 --- a/test/net/imap/test_stringprep_profiles.rb +++ b/test/net/imap/test_stringprep_profiles.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require "net/imap" -require "net/imap/sasl/stringprep" require "test/unit" class StringPrepProfilesTest < Test::Unit::TestCase