Skip to content

Commit

Permalink
✨ Add nameprep stringprep profile
Browse files Browse the repository at this point in the history
This isn't needed for Net::IMAP, but adding a new profile is trivial
with the new generic stringprep code.  Nameprep is used by IDNA, and not
useful without punycode.  But, with the addition of punycode, Net::IMAP
could also support IDNA hostnames.

On the other hand, maybe punycode belongs more to `uri` than `net-imap`?
  • Loading branch information
nevans committed Aug 5, 2023
1 parent 92dabbb commit 2fb9908
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 1 deletion.
1 change: 1 addition & 0 deletions lib/net/imap/stringprep.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class IMAP < Protocol
# codepoint table defined in the RFC-3454 appendices is matched by a Regexp
# defined in this module.
module StringPrep
autoload :NamePrep, File.expand_path("stringprep/nameprep", __dir__)
autoload :SASLprep, File.expand_path("stringprep/saslprep", __dir__)
autoload :Tables, File.expand_path("stringprep/tables", __dir__)
autoload :Trace, File.expand_path("stringprep/trace", __dir__)
Expand Down
70 changes: 70 additions & 0 deletions lib/net/imap/stringprep/nameprep.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# frozen_string_literal: true

module Net
class IMAP
module StringPrep

# Defined in RFC3491[https://tools.ietf.org/html/rfc3491], the +nameprep+
# profile of "Stringprep" is:
# >>>
# used by the IDNA protocol for preparing domain names; it is not
# designed for any other purpose. It is explicitly not designed for
# processing arbitrary free text and SHOULD NOT be used for that
# purpose.
#
# ...
#
# This profile specifies prohibiting using the following tables...:
#
# - C.1.2 (Non-ASCII space characters)
# - C.2.2 (Non-ASCII control characters)
# - C.3 (Private use characters)
# - C.4 (Non-character code points)
# - C.5 (Surrogate codes)
# - C.6 (Inappropriate for plain text)
# - C.7 (Inappropriate for canonical representation)
# - C.8 (Change display properties are deprecated)
# - C.9 (Tagging characters)
#
# IMPORTANT NOTE: This profile MUST be used with the IDNA protocol.
# The IDNA protocol has additional prohibitions that are checked
# outside of this profile.
module NamePrep

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §10
STRINGPREP_PROFILE = "nameprep"

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §2
UNASSIGNED_TABLE = "A.1"

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §3
MAPPING_TABLES = %w[B.1 B.2].freeze

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §4
NORMALIZATION = :nfkc

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §5
PROHIBITED_TABLES = %w[C.1.2 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9].freeze

# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §6
CHECK_BIDI = true

module_function

def nameprep(string, **opts)
StringPrep.stringprep(
string,
unassigned: UNASSIGNED_TABLE,
maps: MAPPING_TABLES,
prohibited: PROHIBITED_TABLES,
normalization: NORMALIZATION,
bidi: CHECK_BIDI,
profile: STRINGPREP_PROFILE,
**opts,
)
end
end

end
end
end
198 changes: 198 additions & 0 deletions test/net/imap/test_stringprep_nameprep.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# frozen_string_literal: true

require "net/imap"
require "test/unit"

class StringPrepNamePrepTest < Test::Unit::TestCase
include Net::IMAP::StringPrep
include Net::IMAP::StringPrep::NamePrep

# The following test cases were taken from
# https://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.txt
# ...mostly from Appendix A.

# Hash[name, [in, out | exception, stored = false]
NAMEPREP_TEST_VECTORS = {
"Map to nothing" => [
"foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" \
"bar\xE2\x80\x8B\xE2\x81\xA0" \
"baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF",
"foobarbaz"
],
"Case folding ASCII U+0043 U+0041 U+0046 U+0045" => [
"CAFE", "cafe"
],
"Case folding 8bit U+00DF (german sharp s)" => [
"\xC3\x9F", "ss"
],
"Case folding U+0130 (turkish capital I with dot)" => [
"\xC4\xB0", "i\xcc\x87"
],
"Case folding multibyte U+0143 U+037A" => [
"\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"
],
"Case folding U+2121 U+33C6 U+1D7BB" => [
"\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB",
"telc\xE2\x88\x95""kg\xCF\x83"
],
"Normalization of U+006a U+030c U+00A0 U+00AA" => [
"\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"
],
"Case folding U+1FB7 and normalization" => [
"\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"
],
"Incorrect UTF-8 encoding of U+00DF" => [
# n.b. this example isn't found in Appendix A, but is in §7.
"\xC3\xdf", [ArgumentError, /invalid byte sequence in UTF-8/]
],
"Incorrect UTF-8 encoding of U+01F0" => [
# n.b. Appendix A doesn't indicate an error for this, but §7 does.
"\xC7\xF0", [ArgumentError, /invalid byte sequence in UTF-8/]
],
"Self-reverting case folding U+0390 and normalization" => [
"\xCE\x90", "\xCE\x90"
],
"Self-reverting case folding U+03B0 and normalization" => [
"\xCE\xB0", "\xCE\xB0"
],
"Self-reverting case folding U+1E96 and normalization" => [
"\xE1\xBA\x96", "\xE1\xBA\x96"
],
"Self-reverting case folding U+1F56 and normalization" => [
"\xE1\xBD\x96", "\xE1\xBD\x96"
],
"ASCII space character U+0020" => [
"\x20", "\x20"
],
"Non-ASCII 8bit space character U+00A0" => [
"\xC2\xA0", "\x20"
],
"Non-ASCII multibyte space character U+1680" => [
"\xE1\x9A\x80", ProhibitedCodepoint
],
"Non-ASCII multibyte space character U+2000" => [
"\xE2\x80\x80", "\x20"
],
"Zero Width Space U+200b" => [
"\xE2\x80\x8b", ""
],
"Non-ASCII multibyte space character U+3000" => [
"\xE3\x80\x80", "\x20"
],
"ASCII control characters U+0010 U+007F" => [
"\x10\x7F", "\x10\x7F"
],
"Non-ASCII 8bit control character U+0085" => [
"\xC2\x85", ProhibitedCodepoint
],
"Non-ASCII multibyte control character U+180E" => [
"\xE1\xA0\x8E", ProhibitedCodepoint
],
"Zero Width No-Break Space U+FEFF" => [
"\xEF\xBB\xBF", ""
],
"Non-ASCII control character U+1D175" => [
"\xF0\x9D\x85\xB5", ProhibitedCodepoint
],
"Plane 0 private use character U+F123" => [
"\xEF\x84\xA3", ProhibitedCodepoint
],
"Plane 15 private use character U+F1234" => [
"\xF3\xB1\x88\xB4", ProhibitedCodepoint
],
"Plane 16 private use character U+10F234" => [
"\xF4\x8F\x88\xB4", ProhibitedCodepoint
],
"Non-character code point U+8FFFE" => [
"\xF2\x8F\xBF\xBE", ProhibitedCodepoint
],
"Non-character code point U+10FFFF" => [
"\xF4\x8F\xBF\xBF", ProhibitedCodepoint
],
"Surrogate code U+DF42" => [
"\xED\xBD\x82", [ArgumentError, /invalid byte sequence in UTF-8/]
],
"Non-plain text character U+FFFD" => [
"\xEF\xBF\xBD", ProhibitedCodepoint
],
"Ideographic description character U+2FF5" => [
"\xE2\xBF\xB5", ProhibitedCodepoint
],
"Display property character U+0341" => [
"\xCD\x81", "\xCC\x81"
],
"Left-to-right mark U+200E" => [
"\xE2\x80\x8E", ProhibitedCodepoint
],
"Deprecated U+202A" => [
"\xE2\x80\xAA", ProhibitedCodepoint
],
"Language tagging character U+E0001" => [
"\xF3\xA0\x80\x81", ProhibitedCodepoint
],
"Language tagging character U+E0042" => [
"\xF3\xA0\x81\x82", ProhibitedCodepoint
],
"Bidi: RandALCat character U+05BE and LCat characters" => [
"foo\xD6\xBE""bar",
[BidiStringError, /string with RandALCat.* must not contain LCat/]
],
"Bidi: RandALCat character U+FD50 and LCat characters" => [
"foo\xEF\xB5\x90""bar",
[BidiStringError, /string with RandALCat.* must not contain LCat/]
],
"Bidi: RandALCat character U+FB38 and LCat characters" => [
"foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar"
],
"Bidi: RandALCat without trailing RandALCat U+0627 U+0031" => [
"\xD8\xA7\x31",
[BidiStringError,
/string with RandALCat.* must start and end with RandALCat/]
],
"Bidi: RandALCat character U+0627 U+0031 U+0628" => [
"\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"
],
"Unassigned code point U+E0002" => [
"\xF3\xA0\x80\x82",
[ProhibitedCodepoint, /contains.* unassigned code points.*Unicode 3.2/i],
true
],
"Larger test (shrinking)" => [
"X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" \
"\xaa\xce\xb0\xe2\x80\x80",
"xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 ",
"Nameprep"
],
"Larger test (expanding)" => [
"X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
"xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" \
"\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82\xa2\xe3\x83\x91" \
"\xe3\x83\xbc\xe3\x83\x88"
],
}

NAMEPREP_TEST_VECTORS.each do |comment, (input, output, stored)|
test_name = "test_nameprep_%s" % comment.downcase.gsub(/\W/, "_")
stored ||= false
ex, message = output
case output
when String
define_method test_name do
assert_equal output, nameprep(input, stored: stored), comment
end
when Class
if message # in Class => ex, (String | Regexp) => message
define_method test_name do
assert_raise_with_message(ex, message, comment) {
nameprep(input, stored: stored)
}
end
else # in Class => ex
define_method test_name do
assert_raise(ex, comment) { nameprep(input, stored: stored) }
end
end
end
end

end
1 change: 0 additions & 1 deletion test/net/imap/test_stringprep_profiles.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# frozen_string_literal: true

require "net/imap"
require "net/imap/sasl/stringprep"
require "test/unit"

class StringPrepProfilesTest < Test::Unit::TestCase
Expand Down

0 comments on commit 2fb9908

Please sign in to comment.