diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 00836c6..28fb121 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -33,6 +33,11 @@ import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.UTF16BEEncoding; +import org.jcodings.specific.UTF16LEEncoding; +import org.jcodings.specific.UTF32BEEncoding; +import org.jcodings.specific.UTF32LEEncoding; +import org.jcodings.specific.UTF8Encoding; import org.jruby.*; import org.jruby.anno.FrameField; import org.jruby.anno.JRubyClass; @@ -51,8 +56,10 @@ import org.jruby.util.ByteList; import org.jruby.util.StringSupport; import org.jruby.util.TypeConverter; +import org.jruby.util.func.ObjectObjectIntFunction; import org.jruby.util.io.EncodingUtils; import org.jruby.util.io.Getline; +import org.jruby.util.io.IOEncodable; import org.jruby.util.io.ModeFlags; import org.jruby.util.io.OpenFile; @@ -62,6 +69,7 @@ import java.util.Arrays; import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import static java.lang.Byte.toUnsignedInt; import static org.jruby.RubyEnumerator.enumeratorize; import static org.jruby.runtime.Visibility.PRIVATE; import static org.jruby.util.RubyStringBuilder.str; @@ -93,6 +101,10 @@ static class StringIOData { private static final AtomicReferenceFieldUpdater LOCKED_UPDATER = AtomicReferenceFieldUpdater.newUpdater(StringIOData.class, Object.class, "owner"); + private static final ThreadLocal VMODE_VPERM_TL = ThreadLocal.withInitial(() -> EncodingUtils.vmodeVperm(null, null)); + private static final ThreadLocal FMODE_TL = ThreadLocal.withInitial(() -> new int[]{0}); + private static final int[] OFLAGS_UNUSED = new int[]{0}; + public static RubyClass createStringIOClass(final Ruby runtime) { RubyClass stringIOClass = runtime.defineClass( "StringIO", runtime.getObject(), StringIO::new); @@ -298,12 +310,22 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO Encoding encoding = null; IRubyObject options = ArgsUtil.getOptionsArg(runtime, maybeOptions); + IOEncodable.ConvConfig ioEncodable = new IOEncodable.ConvConfig(); if (!options.isNil()) { argc--; - IRubyObject encodingOpt = ArgsUtil.extractKeywordArg(context, "encoding", (RubyHash) options); - if (!encodingOpt.isNil()) { - encoding = EncodingUtils.toEncoding(context, encodingOpt); - } + + int[] fmode = {0}; + Object vmodeAndVpermP = VMODE_VPERM_TL.get(); + + // switch to per-use oflags if it is ever used in the future + EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, options, OFLAGS_UNUSED, FMODE_TL.get()); + + // clear shared vmodeVperm + EncodingUtils.vmode(vmodeAndVpermP, null); + EncodingUtils.vperm(vmodeAndVpermP, null); + + ptr.flags = fmode[0]; + encoding = ioEncodable.enc; } switch (argc) { @@ -312,11 +334,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO final boolean trunc; if (mode instanceof RubyFixnum) { int flags = RubyFixnum.fix2int(mode); - ptr.flags = ModeFlags.getOpenFileFlagsFor(flags); + ptr.flags |= ModeFlags.getOpenFileFlagsFor(flags); trunc = (flags & ModeFlags.TRUNC) != 0; } else { String m = arg1.convertToString().toString(); - ptr.flags = OpenFile.ioModestrFmode(runtime, m); + ptr.flags |= OpenFile.ioModestrFmode(runtime, m); trunc = m.length() > 0 && m.charAt(0) == 'w'; } string = arg0.convertToString(); @@ -329,11 +351,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO break; case 1: string = arg0.convertToString(); - ptr.flags = string.isFrozen() ? OpenFile.READABLE : OpenFile.READWRITE; + ptr.flags |= string.isFrozen() ? OpenFile.READABLE : OpenFile.READWRITE; break; case 0: string = RubyString.newEmptyString(runtime, runtime.getDefaultExternalEncoding()); - ptr.flags = OpenFile.READWRITE; + ptr.flags |= OpenFile.READWRITE; break; default: // should not be possible @@ -344,6 +366,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO ptr.enc = encoding; ptr.pos = 0; ptr.lineno = 0; + if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) setEncodingByBOM(context); // funky way of shifting readwrite flags into object flags flags |= (ptr.flags & OpenFile.READWRITE) * (STRIO_READABLE / OpenFile.READABLE); } finally { @@ -1636,6 +1659,71 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj return set_encoding(context, enc); } + @JRubyMethod + public IRubyObject set_encoding_by_bom(ThreadContext context) { + if (setEncodingByBOM(context) == null) return context.nil; + + return context.runtime.getEncodingService().convertEncodingToRubyEncoding(ptr.enc); + } + + private Encoding setEncodingByBOM(ThreadContext context) { + Encoding enc = detectBOM(context, ptr.string, (ctx, enc2, bomlen) -> { + ptr.pos = bomlen; + if (writable()) { + ptr.string.setEncoding(enc2); + } + return enc2; + }); + ptr.enc = enc; + return enc; + } + + private static Encoding detectBOM(ThreadContext context, RubyString str, ObjectObjectIntFunction callback) { + int p; + int len; + + ByteList byteList = str.getByteList(); + byte[] bytes = byteList.unsafeBytes(); + p = byteList.begin(); + len = byteList.realSize(); + + if (len < 1) return null; + switch (toUnsignedInt(bytes[p])) { + case 0xEF: + if (len < 3) break; + if (toUnsignedInt(bytes[p + 1]) == 0xBB && toUnsignedInt(bytes[p + 2]) == 0xBF) { + return callback.apply(context, UTF8Encoding.INSTANCE, 3); + } + break; + + case 0xFE: + if (len < 2) break; + if (toUnsignedInt(bytes[p + 1]) == 0xFF) { + return callback.apply(context, UTF16BEEncoding.INSTANCE, 2); + } + break; + + case 0xFF: + if (len < 2) break; + if (toUnsignedInt(bytes[p + 1]) == 0xFE) { + if (len >= 4 && toUnsignedInt(bytes[p + 2]) == 0 && toUnsignedInt(bytes[p + 3]) == 0) { + return callback.apply(context, UTF32LEEncoding.INSTANCE, 4); + } + return callback.apply(context, UTF16LEEncoding.INSTANCE, 2); + } + break; + + case 0: + if (len < 4) break; + if (toUnsignedInt(bytes[p + 1]) == 0 && toUnsignedInt(bytes[p + 2]) == 0xFE && toUnsignedInt(bytes[p + 3]) == 0xFF) { + return callback.apply(context, UTF32BEEncoding.INSTANCE, 4); + } + break; + } + return callback.apply(context, null, 0); + } + + @JRubyMethod public IRubyObject external_encoding(ThreadContext context) { return context.runtime.getEncodingService().convertEncodingToRubyEncoding(getEncoding());