+ * This method first tries to just read the tail section of the file to get the necessary chars.
+ * To handle multi-byte variable length encoding (such as UTF-8), we read a larger than
+ * necessary chunk.
+ *
+ *
+ * Some multi-byte encoding, such as Shift-JIS (http://en.wikipedia.org/wiki/Shift_JIS) doesn't
+ * allow the first byte and the second byte of a single char to be unambiguously identified,
+ * so it is possible that we end up decoding incorrectly if we start reading in the middle of a multi-byte
+ * character. All the CJK multi-byte encodings that I know of are self-correcting; as they are ASCII-compatible,
+ * any ASCII characters or control characters will bring the decoding back in sync, so the worst
+ * case we just have some garbage in the beginning that needs to be discarded. To accommodate this,
+ * we read additional 1024 bytes.
+ *
+ *
+ * Other encodings, such as UTF-8, are better in that the character boundary is unambiguous,
+ * so there can be at most one garbage char. For dealing with UTF-16 and UTF-32, we read at
+ * 4 bytes boundary (all the constants and multipliers are multiples of 4.)
+ *
+ *
+ * Note that it is possible to construct a contrived input that fools this algorithm, and in this method
+ * we are willing to live with a small possibility of that to avoid reading the whole text. In practice,
+ * such an input is very unlikely.
+ *
+ *
+ * So all in all, this algorithm should work decently, and it works quite efficiently on a large text.
+ */
+ public @Nonnull String fastTail(int numChars, Charset cs) throws IOException {
+ RandomAccessFile raf = new RandomAccessFile(file,"r");
+
+ long len = raf.length();
+ // err on the safe side and assume each char occupies 4 bytes
+ // additional 1024 byte margin is to bring us back in sync in case we started reading from non-char boundary.
+ long pos = Math.max(0, len - (numChars*4+1024));
+ raf.seek(pos);
+
+ byte[] tail = new byte[(int) (len-pos)];
+ raf.readFully(tail);
+
+ String tails = new String(tail,cs);
+
+ return new String(tails.substring(Math.max(0,tails.length()-numChars))); // trim the baggage of substring by allocating a new String
+ }
+
+ /**
+ * Uses the platform default encoding.
+ */
+ public @Nonnull String fastTail(int numChars) throws IOException {
+ return fastTail(numChars,Charset.defaultCharset());
+ }
+
+
public String readTrim() throws IOException {
return read().trim();
}
diff --git a/test/src/test/groovy/hudson/util/TextFileTest.groovy b/test/src/test/groovy/hudson/util/TextFileTest.groovy
new file mode 100644
index 000000000000..9347b488a914
--- /dev/null
+++ b/test/src/test/groovy/hudson/util/TextFileTest.groovy
@@ -0,0 +1,102 @@
+package hudson.util
+
+import org.junit.After
+import org.junit.Test
+
+import java.nio.charset.Charset
+
+/**
+ *
+ *
+ * @author Kohsuke Kawaguchi
+ */
+class TextFileTest {
+ List files = [];
+
+ @After
+ void tearDown() {
+ files*.delete()
+ }
+
+ @Test
+ public void head() {
+ def f = newFile()
+ f.text = getClass().getResource("ascii.txt").text
+
+ def t = new TextFile(f)
+ def first35 = "Lorem ipsum dolor sit amet, consect"
+ assert t.head(35).equals(first35)
+ assert first35.length()==35
+ }
+
+ @Test
+ public void shortHead() {
+ def f = newFile()
+ f.text = "hello"
+
+ def t = new TextFile(f)
+ assert t.head(35).equals("hello")
+ }
+
+ @Test
+ public void tail() {
+ def f = newFile()
+ f.text = getClass().getResource("ascii.txt").text
+
+ def t = new TextFile(f)
+ def tail35 = "la, vitae interdum quam rutrum id.\n"
+ assert t.fastTail(35).equals(tail35)
+ assert tail35.length()==35
+ }
+
+ @Test
+ public void shortTail() {
+ def f = newFile()
+ f.text = "hello"
+
+ def t = new TextFile(f)
+ assert t.fastTail(35).equals("hello")
+ }
+
+ /**
+ * Shift JIS is a multi-byte character encoding.
+ *
+ * In it, 0x82 0x83 is \u30e2, and 0x83 0x82 is \uFF43.
+ * So if aren't careful, we'll parse the text incorrectly.
+ */
+ @Test
+ public void tailShiftJIS() {
+ def f = newFile()
+
+ def t = new TextFile(f)
+
+ f.withOutputStream { o ->
+ (1..80).each {
+ (1..40).each {
+ o.write(0x83)
+ o.write(0x82)
+ }
+ o.write(0x0A);
+ }
+ }
+
+ def tail = t.fastTail(35, Charset.forName("Shift_JIS"))
+ assert tail.equals("\u30e2"*34+"\n")
+ assert tail.length()==35
+
+ // add one more byte to force fastTail to read from one byte ahead
+ // between this and the previous case, it should start parsing text incorrectly, until it hits NL
+ // where it comes back in sync
+ f.append([0x0A] as byte[])
+
+ tail = t.fastTail(35, Charset.forName("Shift_JIS"))
+ assert tail.equals("\u30e2"*33+"\n\n")
+ assert tail.length()==35
+ }
+
+ def newFile() {
+ def f = File.createTempFile("foo", "txt")
+ files.add(f)
+ return f
+ }
+}
diff --git a/test/src/test/resources/hudson/util/ascii.txt b/test/src/test/resources/hudson/util/ascii.txt
new file mode 100644
index 000000000000..cc0cba127ed0
--- /dev/null
+++ b/test/src/test/resources/hudson/util/ascii.txt
@@ -0,0 +1,54 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque porta
+consectetur sapien vel sodales. Cras volutpat odio ipsum, ac euismod turpis
+volutpat pulvinar. Mauris at lorem fringilla, aliquet erat vel, tempor odio.
+Sed vel elit eget libero adipiscing pulvinar vitae at lorem. Mauris lacinia dui
+quis fermentum fermentum. Pellentesque dapibus elementum porta. Sed aliquam
+neque non orci aliquet, vitae accumsan tortor pretium. Sed vel nisi ultrices,
+vestibulum tortor non, interdum lectus. Integer euismod quam eros, quis semper
+lacus dignissim at. Etiam blandit volutpat augue quis fermentum. Suspendisse
+vitae massa in odio luctus commodo eu eget ante. Nunc pretium sodales nisl in
+ullamcorper. Nam interdum, leo ac malesuada convallis, ipsum leo tristique
+purus, sit amet volutpat lorem nulla ut dui. Pellentesque eget nunc ut orci
+elementum sagittis. Proin enim metus, consectetur et fringilla sed, scelerisque
+eu risus. Nullam et augue placerat, hendrerit lacus at, bibendum sem.
+
+Suspendisse venenatis nulla vitae arcu placerat lobortis. Vestibulum eleifend
+luctus lacus, sit amet suscipit lorem ultrices vitae. Praesent eu porta elit.
+Phasellus hendrerit mattis libero, in sollicitudin neque tristique a. In hac
+habitasse platea dictumst. Maecenas eget bibendum orci, eu dapibus quam. Donec
+gravida dapibus diam, vel aliquet sem hendrerit facilisis. Maecenas id nisi
+lacus. Ut mollis interdum ligula, sed mattis enim cursus ac. Proin nec arcu a
+neque dignissim iaculis ac non enim. Interdum et malesuada fames ac ante ipsum
+primis in faucibus. Aliquam rutrum hendrerit lacus, ut facilisis nisl
+pellentesque vel. Nunc ut ultricies turpis. Praesent vel orci iaculis, sagittis
+nunc at, bibendum nulla. Interdum et malesuada fames ac ante ipsum primis in
+faucibus. Sed scelerisque lectus vel nisi malesuada, eget pretium velit
+porttitor.
+
+Proin porta nibh ut urna placerat facilisis. Praesent at nisi malesuada,
+lacinia eros a, fermentum orci. Maecenas at semper elit. Morbi sit amet tempus
+tellus. Duis convallis sollicitudin odio vitae volutpat. Curabitur nec arcu
+eget tellus elementum ultrices non at dolor. Vivamus convallis velit a neque
+posuere sollicitudin. Donec quis est adipiscing tortor dignissim pellentesque
+ut ut lacus. Nam sit amet porttitor purus, sit amet pulvinar quam. In luctus
+porttitor scelerisque. Duis dapibus pharetra sem quis auctor. Cras pulvinar
+faucibus volutpat. Donec vitae ligula fringilla, rhoncus leo egestas,
+sollicitudin est. Class aptent taciti sociosqu ad litora torquent per conubia
+nostra, per inceptos himenaeos.
+
+Cras id luctus ipsum. Donec quis congue urna. Praesent dictum mattis sapien,
+eget placerat ipsum rutrum et. Etiam gravida egestas odio, sit amet molestie
+leo tempor vel. Aenean lacus dui, commodo sed velit in, consequat ultricies
+velit. Nulla facilisi. Aenean sed vulputate odio. Interdum et malesuada fames
+ac ante ipsum primis in faucibus. Nulla eleifend, metus nec sodales lobortis,
+purus metus aliquam odio, vel lacinia nisi nulla eu erat. Donec tincidunt, leo
+gravida hendrerit feugiat, nibh velit fermentum urna, non sodales urna velit
+tincidunt orci. In bibendum justo eleifend molestie suscipit. Cras luctus urna
+sit amet consectetur imperdiet. Aliquam pretium faucibus lacus, et pretium
+tellus rhoncus fermentum.
+
+Sed rhoncus varius accumsan. Nulla facilisi. In vehicula nec diam a bibendum.
+Cum sociis natoque penatibus et magnis dis parturient montes, nascetur
+ridiculus mus. Suspendisse ullamcorper faucibus risus in ultrices. Integer
+dignissim ultrices eros, quis blandit purus ullamcorper in. Suspendisse laoreet
+aliquet nulla, vitae interdum quam rutrum id.