Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Compatibility] Add String#bytesplice #3044

Merged
merged 1 commit into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Compatibility:
* Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (#3039, @rwstauner).
* Alias `String#-@` to `String#dedup` (#3039, @itarato).
* Fix `Pathname#relative_path_from` to convert string arguments to Pathname objects (@rwstauner).
* Add `String#bytesplice` (#3039, @itarato).

Performance:

Expand Down
133 changes: 133 additions & 0 deletions spec/ruby/core/string/bytesplice_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'

describe "String#bytesplice" do
ruby_version_is "3.2" do
it "raises IndexError when index is less than -bytesize" do
-> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string")
end

it "raises IndexError when index is greater than bytesize" do
-> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string")
end

andrykonchin marked this conversation as resolved.
Show resolved Hide resolved
it "raises IndexError for negative length" do
-> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2")
end

it "replaces with integer indices" do
"hello".bytesplice(-5, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 1, "xxx").should == "xxxello"
"hello".bytesplice(0, 5, "xxx").should == "xxx"
"hello".bytesplice(0, 6, "xxx").should == "xxx"
end

it "raises RangeError when range left boundary is less than -bytesize" do
-> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range")
end

it "replaces with ranges" do
"hello".bytesplice(-5...-5, "xxx").should == "xxxhello"
"hello".bytesplice(0...0, "xxx").should == "xxxhello"
"hello".bytesplice(0..0, "xxx").should == "xxxello"
"hello".bytesplice(0...1, "xxx").should == "xxxello"
"hello".bytesplice(0..1, "xxx").should == "xxxllo"
"hello".bytesplice(0..-1, "xxx").should == "xxx"
"hello".bytesplice(0...5, "xxx").should == "xxx"
"hello".bytesplice(0...6, "xxx").should == "xxx"
end

it "raises TypeError when integer index is provided without length argument" do
-> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)")
end

it "replaces on an empty string" do
"".bytesplice(0, 0, "").should == ""
"".bytesplice(0, 0, "xxx").should == "xxx"
end

it "mutates self" do
s = "hello"
s.bytesplice(2, 1, "xxx").should.equal?(s)
end

it "raises when string is frozen" do
s = "hello".freeze
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end
end

describe "String#bytesplice with multibyte characters" do
ruby_version_is "3.2" do
it "raises IndexError when index is out of byte size boundary" do
-> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string")
end

it "raises IndexError when index is not on a codepoint boundary" do
-> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
end

it "raises IndexError when length is not matching the codepoint boundary" do
-> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
end

it "replaces with integer indices" do
"こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(3, 3, "はは").should == "こははにちは"
"こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx"
end

it "replaces with range" do
"こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは"
"こんにちは".bytesplice(0..-1, "xxx").should == "xxx"
"こんにちは".bytesplice(0...15, "xxx").should == "xxx"
"こんにちは".bytesplice(0...18, "xxx").should == "xxx"
end

it "treats negative length for range as 0" do
"こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは"
"こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは"
end

it "raises when ranges not match codepoint boundaries" do
-> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
# Begin is incorrect
-> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary")
-> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary")
# End is incorrect
-> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary")
-> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary")
end

it "deals with a different encoded argument" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "xxxxxx"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(0, 3, sub)
result.should == "xxxxxxんにちは"
result.encoding.should == Encoding::UTF_8

s = "xxxxxx"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(0, 3, sub)
result.should == "こんにちはxxx"
result.encoding.should == Encoding::UTF_8
end
end
end
1 change: 1 addition & 0 deletions spec/tags/truffle/methods_tags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ fails:Public methods on Thread should include native_thread_id
fails:Public methods on UnboundMethod should include private?
fails:Public methods on UnboundMethod should include protected?
fails:Public methods on UnboundMethod should include public?
fails:Public methods on String should not include bytesplice
1 change: 1 addition & 0 deletions spec/truffleruby.next-specs
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ spec/ruby/core/hash/shift_spec.rb
spec/ruby/core/range/size_spec.rb

spec/ruby/core/string/dedup_spec.rb
spec/ruby/core/string/bytesplice_spec.rb
12 changes: 12 additions & 0 deletions src/main/java/org/truffleruby/core/string/StringNodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -4459,4 +4459,16 @@ private String formatTooLongError(int count, RubyString string) {

}

@Primitive(name = "string_is_character_head?", lowerFixnum = 2)
public abstract static class IsCharacterHeadPrimitiveNode extends PrimitiveArrayArgumentsNode {

@Specialization
protected boolean isCharacterHead(RubyEncoding enc, Object string, int byteOffset,
@Cached RubyStringLibrary libString,
@Cached IsCharacterHeadNode isCharacterHeadNode) {
var tstring = libString.getTString(string);
return isCharacterHeadNode.execute(enc, tstring, byteOffset);
}
}

}
43 changes: 43 additions & 0 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,49 @@ def byteslice(index_or_range, length = undefined)
byteslice index, length
end

def bytesplice(index_or_range, length = undefined, str)
is_range = Primitive.is_a?(index_or_range, Range)

if Primitive.undefined?(length)
raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range

start, len = Primitive.range_normalized_start_length(index_or_range, bytesize)
len = 0 if len < 0
else
start = Primitive.rb_to_int(index_or_range)
start += bytesize if start < 0
len = Primitive.rb_to_int(length)
end

str = StringValue(str)

if len < 0
raise IndexError, "negative length #{len}"
end

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
else
raise IndexError, "index #{index_or_range} out of string"
end
end

len = bytesize - start if len > bytesize - start
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Primitive.min would be nicer here, but let's do that in #3043 since this PR is in the merge queue

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed there.

finish = start + len

if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start)
raise IndexError, "offset #{start} does not land on character boundary"
end
if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish)
raise IndexError, "offset #{finish} does not land on character boundary"
end

Primitive.check_mutable_string(self)
enc = Primitive.encoding_ensure_compatible_str(self, str)
Primitive.string_splice(self, str, start, len, enc)
eregon marked this conversation as resolved.
Show resolved Hide resolved
end

def self.try_convert(obj)
Truffle::Type.try_convert obj, String, :to_str
end
Expand Down
4 changes: 4 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def byteslice(...)
to_s.byteslice(...)
end

def bytesplice(...)
to_s.bytesplice(...)
end

def capitalize(...)
to_s.capitalize(...)
end
Expand Down