Skip to content

Commit

Permalink
Add String#bytesplice with tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
itarato committed May 18, 2023
1 parent 2beb695 commit fe7c4c8
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Compatibility:
* Add support for array pattern matching. This is opt-in via `--pattern-matching` since pattern matching is not fully supported yet. (#2683, @razetime).
* Fix `Array#[]` with `ArithmeticSequence` argument when step is negative (@itarato).
* Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (@rwstauner).
* Add `String#bytesplice` (@itarato).

Performance:

Expand Down
130 changes: 130 additions & 0 deletions spec/ruby/core/string/bytesplice_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'

describe "String#bytesplice" do
ruby_version_is "3.2" do
it "raises IndexError when index is less than -size" do
-> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string")
end

it "raises IndexError when index is greater or equal to size" do
-> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string")
end

it "replaces with integer indices" do
"hello".bytesplice(-5, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 1, "xxx").should == "xxxello"
"hello".bytesplice(0, 5, "xxx").should == "xxx"
"hello".bytesplice(0, 6, "xxx").should == "xxx"
end

it "raises RangeError when range left boundary is less than -size" do
-> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range")
end

it "replaces with ranges" do
"hello".bytesplice(-5...-5, "xxx").should == "xxxhello"
"hello".bytesplice(0...0, "xxx").should == "xxxhello"
"hello".bytesplice(0..0, "xxx").should == "xxxello"
"hello".bytesplice(0...1, "xxx").should == "xxxello"
"hello".bytesplice(0..1, "xxx").should == "xxxllo"
"hello".bytesplice(0..-1, "xxx").should == "xxx"
"hello".bytesplice(0...5, "xxx").should == "xxx"
"hello".bytesplice(0...6, "xxx").should == "xxx"
end

it "raises TypeError when integer index is provided without length argument" do
-> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)")
end

it "replaces on an empty string" do
"".bytesplice(0, 0, "").should == ""
"".bytesplice(0, 0, "xxx").should == "xxx"
end

it "mutates self" do
s = "hello"
s.bytesplice(2, 1, "xxx").should == "hexxxlo"
s.should == "hexxxlo"
end

it "raises when string is frozen" do
s = "hello".freeze
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end
end

describe "String#bytesplice with multibyte characters" do
ruby_version_is "3.2" do
it "raises IndexError when index is out of byte size boundary" do
-> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string")
end

it "raises IndexError when index is not on a codepoint boundary" do
-> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
end

it "raises IndexError when length is not matching the codepoint boundary" do
-> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
end

it "replaces with integer indices" do
"こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(3, 3, "はは").should == "こははにちは"
"こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx"
end

it "replaces with range" do
"こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは"
"こんにちは".bytesplice(0..-1, "xxx").should == "xxx"
"こんにちは".bytesplice(0...15, "xxx").should == "xxx"
"こんにちは".bytesplice(0...18, "xxx").should == "xxx"
end

it "treats negative length for range as 0" do
"こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは"
"こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは"
end

it "raises when ranges not match codepoint boundaries" do
-> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
# Begin is incorrect
-> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary")
-> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary")
# End is incorrect
-> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary")
-> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary")
end

it "deals with a different encoded argument" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "xxxxxx"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(0, 3, sub)
result.should == "xxxxxxんにちは"
result.encoding.should == Encoding::UTF_8

s = "xxxxxx"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(0, 3, sub)
result.should == "こんにちはxxx"
result.encoding.should == Encoding::UTF_8
end
end
end
1 change: 1 addition & 0 deletions spec/truffle/methods/String.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ b
bytes
bytesize
byteslice
bytesplice
capitalize
capitalize!
casecmp
Expand Down
48 changes: 48 additions & 0 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,54 @@ def byteslice(index_or_range, length = undefined)
byteslice index, length
end

def bytesplice(*args)
is_range = Primitive.is_a?(args[0], Range)

if args.size == 3
start = Primitive.rb_to_int(args[0])
start += bytesize if start < 0

len = Primitive.rb_to_int(args[1])
str = StringValue(args[2])
elsif args.size == 2
unless is_range
raise(TypeError, "wrong argument type #{Primitive.class(args[0])} (expected Range)")
end

start, len = Primitive.range_normalized_start_length(args[0], bytesize)
len = 0 if len < 0
str = StringValue(args[1])
else
raise(ArgumentError, "wrong number of arguments (given #{args.size}, expected 2..3)")
end

if len < 0
raise(IndexError, "negative length #{len}")
end

if bytesize < start || start < 0
if is_range
raise(RangeError, "#{args[0]} out of range")
else
raise(IndexError, "index #{args[0]} out of string")
end
end

len = bytesize - start if len > bytesize - start
finish = start + len

unless Truffle::StringOperations.on_codepoint_boundary?(self, start)
raise(IndexError, "offset #{start} does not land on character boundary")
end
unless Truffle::StringOperations.on_codepoint_boundary?(self, finish)
raise(IndexError, "offset #{finish} does not land on character boundary")
end

Primitive.check_mutable_string(self)
enc = Primitive.encoding_ensure_compatible_str(self, str)
Primitive.string_splice(self, str, start, len, enc)
end

def self.try_convert(obj)
Truffle::Type.try_convert obj, String, :to_str
end
Expand Down
4 changes: 4 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def byteslice(...)
to_s.byteslice(...)
end

def bytesplice(...)
to_s.bytesplice(...)
end

def capitalize(...)
to_s.capitalize(...)
end
Expand Down
13 changes: 13 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/string_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -411,5 +411,18 @@ def self.assign_regexp(string, index, count, replacement)

Primitive.string_splice(string, replacement, bi, bs, enc)
end

def self.on_codepoint_boundary?(string, byte_pos)
char_pos = Primitive.byte_index_to_character_index(string, byte_pos)
adjusted_byte_pos = if char_pos >= string.size
# Handle index overflow cases.
# @see com.oracle.truffle.api.strings.ByteIndexToCodePointIndexNode#execute for details.
string.bytesize
else
Primitive.character_index_to_byte_index(string, char_pos)
end

byte_pos == adjusted_byte_pos
end
end
end

0 comments on commit fe7c4c8

Please sign in to comment.