From 7aa7a8538aa263ec5f2b44aee5154893fc396c7c Mon Sep 17 00:00:00 2001
From: AlexHaxe <Alexander.Blum@gmail.com>
Date: Tue, 1 May 2018 17:28:02 +0200
Subject: [PATCH] refactored to use Bytes instead of String for file contents,
 see #98 (#402)

* refactored to use Bytes instead of String for file contents, see #98
---
 CHANGES.md                                    |  1 +
 checkstyle.json                               |  2 +-
 src/checkstyle/CheckFile.hx                   |  4 ++-
 src/checkstyle/Checker.hx                     | 30 ++++++++++---------
 .../block/ConditionalCompilationCheck.hx      | 14 ++++-----
 .../checks/block/RightCurlyCheck.hx           |  4 +--
 .../checks/imports/UnusedImportCheck.hx       |  2 +-
 .../checks/literal/StringLiteralCheck.hx      |  2 +-
 .../checks/whitespace/ArrayAccessCheck.hx     |  4 +--
 .../checks/whitespace/WhitespaceAfterCheck.hx |  2 +-
 .../whitespace/WhitespaceAroundCheck.hx       |  7 +++--
 .../checks/whitespace/WhitespaceCheckBase.hx  |  7 +++--
 .../checks/whitespace/WrapCheckBase.hx        |  7 +++--
 src/checkstyle/import.hx                      |  2 ++
 src/checkstyle/utils/StringUtils.hx           |  5 ++--
 test/checks/CheckTestCase.hx                  |  4 ++-
 test/misc/ThreadTest.hx                       |  5 +++-
 17 files changed, 59 insertions(+), 43 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index 726be202..b7e605ac 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -7,6 +7,7 @@
 - Fixed BkOpen childs in token tree parser [#398](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/398)
 - Fixed bad offset crash with C++ build on7 Windows 10 [#398](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/398)
 - Fixed object declaration handling [#399](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/399)
+- Refactored content handling to use Bytes instead of String (should fix [#98](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/98)) [#402](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/402)
 - Added unittests for ParserQueue and CheckerPool [#393](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/393)
 - Added unittests for TokenTree structure verification [#400](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/400)
 - Removed `.` from default settings in SeparatorWrapCheck [#400](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/400)
diff --git a/checkstyle.json b/checkstyle.json
index eae853e7..b50ac2b1 100644
--- a/checkstyle.json
+++ b/checkstyle.json
@@ -171,7 +171,7 @@
         },
         {
             "props": {
-                "ignoreNumbers": [-1, 0, 1, 2, 3, 4.0, 5, 8, 13, 21, 34, 100]
+                "ignoreNumbers": [-1, 0, 1, 2, 3, 4.0, 5, 8, 10, 13, 21, 34, 100]
             },
             "type": "MagicNumber"
         },
diff --git a/src/checkstyle/CheckFile.hx b/src/checkstyle/CheckFile.hx
index 0f93475d..d9875524 100644
--- a/src/checkstyle/CheckFile.hx
+++ b/src/checkstyle/CheckFile.hx
@@ -1,7 +1,9 @@
 package checkstyle;
 
+import byte.ByteData;
+
 typedef CheckFile = {
 	var name:String;
-	var content:String;
+	var content:ByteData;
 	var index:Int;
 }
\ No newline at end of file
diff --git a/src/checkstyle/Checker.hx b/src/checkstyle/Checker.hx
index ce851b07..1b63aea7 100644
--- a/src/checkstyle/Checker.hx
+++ b/src/checkstyle/Checker.hx
@@ -1,6 +1,5 @@
 package checkstyle;
 
-import byte.ByteData;
 import haxe.CallStack;
 import haxeparser.HaxeParser;
 import haxeparser.HaxeLexer;
@@ -15,7 +14,6 @@ import checkstyle.token.TokenTreeBuilder;
 class Checker {
 
 	public var file:CheckFile;
-	public var bytes:ByteData;
 	public var lines:Array<String>;
 	public var tokens:Array<Token>;
 	public var ast:Ast;
@@ -41,19 +39,19 @@ class Checker {
 
 	public function getTokenTree():TokenTree {
 		if (tokens == null) return null;
-		if (tokenTree == null) tokenTree = TokenTreeBuilder.buildTokenTree(tokens, bytes);
+		if (tokenTree == null) tokenTree = TokenTreeBuilder.buildTokenTree(tokens, file.content);
 		return tokenTree;
 	}
 
 	function makePosIndices() {
-		var code = file.content;
+		var code:Bytes = cast file.content;
 		linesIdx = [];
 
 		var last = 0;
 		var left = false;
 
 		for (i in 0...code.length) {
-			if (code.charAt(i) == "\n") {
+			if (code.get(i) == 0x0A) {
 				linesIdx.push({l:last, r:i});
 				last = i + 1;
 				left = false;
@@ -71,11 +69,16 @@ class Checker {
 	}
 
 	public function getString(off:Int, off2:Int):String {
-		return file.content.substr(off, off2 - off);
+		var code:Bytes = cast file.content;
+		var len:Int = off2 - off;
+		if ((off >= code.length) || (off + len > code.length)) return "";
+		return code.sub(off, off2 - off).toString();
 	}
 
 	function findLineSeparator() {
-		var code = file.content;
+		var codeBytes:Bytes = cast file.content;
+		var code:String = codeBytes.toString();
+
 		for (i in 0...code.length) {
 			var char = code.charAt(i);
 			if (char == "\r" || char == "\n") {
@@ -92,15 +95,16 @@ class Checker {
 	}
 
 	function makeLines() {
-		var code = file.content;
-		lines = code.split(lineSeparator);
+		var code:Bytes = cast file.content;
+		var textCode:String = code.toString();
+		lines = textCode.split(lineSeparator);
 	}
 
 	function makeTokens() {
 		try {
 			tokens = [];
 			tokenTree = null;
-			var lexer = new HaxeLexer(bytes, file.name);
+			var lexer = new HaxeLexer(file.content, file.name);
 			var t:Token = lexer.token(HaxeLexer.tok);
 
 			while (t.tok != Eof) {
@@ -128,8 +132,7 @@ class Checker {
 	}
 
 	function makeAST(defines:Array<String>):Ast {
-		var code = file.content;
-		var parser = new HaxeParser(byte.ByteData.ofString(code), file.name);
+		var parser = new HaxeParser(file.content, file.name);
 		parser.define("cross");
 		parser.define("scriptable");
 		parser.define("unsafe");
@@ -176,7 +179,7 @@ class Checker {
 	public function loadFileContent(checkFile:CheckFile) {
 		// unittests set content before running Checker
 		// real checks load content here
-		if (checkFile.content == null) checkFile.content = File.getContent(checkFile.name);
+		if (checkFile.content == null) checkFile.content = cast File.getBytes(checkFile.name);
 	}
 
 	public function unloadFileContent(checkFile:CheckFile) {
@@ -185,7 +188,6 @@ class Checker {
 
 	public function createContext(checkFile:CheckFile):Bool {
 		file = checkFile;
-		bytes = byte.ByteData.ofString(file.content);
 		ReporterManager.INSTANCE.fileStart(file);
 		try {
 			findLineSeparator();
diff --git a/src/checkstyle/checks/block/ConditionalCompilationCheck.hx b/src/checkstyle/checks/block/ConditionalCompilationCheck.hx
index 5c7abb0d..c604c69a 100644
--- a/src/checkstyle/checks/block/ConditionalCompilationCheck.hx
+++ b/src/checkstyle/checks/block/ConditionalCompilationCheck.hx
@@ -51,8 +51,8 @@ class ConditionalCompilationCheck extends Check {
 	}
 
 	function checkMultiLine(tok:TokenTree, linePos:LinePos) {
-		var line:String = checker.lines[linePos.line];
-		var prefix:String = line.substr(0, linePos.ofs);
+		var line:Bytes = Bytes.ofString(checker.lines[linePos.line]);
+		var prefix:String = line.sub(0, linePos.ofs).toString();
 		if (checkLine(tok, linePos, line)) return;
 
 		switch (policy) {
@@ -68,8 +68,8 @@ class ConditionalCompilationCheck extends Check {
 			switch (childTok.tok) {
 				case Sharp("else"), Sharp("elseif"), Sharp("end"):
 					var childLinePos:LinePos = checker.getLinePos(childTok.pos.min);
-					var childLine:String = checker.lines[childLinePos.line];
-					var childPrefix:String = childLine.substr(0, childLinePos.ofs);
+					var childLine:Bytes = Bytes.ofString(checker.lines[childLinePos.line]);
+					var childPrefix:String = childLine.sub(0, childLinePos.ofs).toString();
 					if (checkLine(childTok, childLinePos, childLine)) continue;
 					if (childPrefix == prefix) continue;
 					logPos('Indentation of $childTok must match corresponding #if', childTok.pos);
@@ -78,9 +78,9 @@ class ConditionalCompilationCheck extends Check {
 		}
 	}
 
-	function checkLine(tok:TokenTree, linePos:LinePos, line:String):Bool {
+	function checkLine(tok:TokenTree, linePos:LinePos, line:Bytes):Bool {
 		var r:EReg = ~/^[ \t]*$/;
-		var prefix:String = line.substr(0, linePos.ofs);
+		var prefix:String = line.sub(0, linePos.ofs).toString();
 		if (!r.match(prefix)) {
 			logPos('only whitespace allowed before $tok', tok.pos);
 			return true;
@@ -89,7 +89,7 @@ class ConditionalCompilationCheck extends Check {
 		if (expr == null) return false;
 		var linePosAfter:LinePos = checker.getLinePos(expr.getPos().max);
 		if (linePosAfter.line == linePos.line) {
-			var postfix:String = line.substr(linePosAfter.ofs);
+			var postfix:String = line.sub(linePosAfter.ofs, line.length - linePosAfter.ofs).toString();
 			if (!r.match(postfix)) {
 				logPos('only whitespace allowed after $tok', tok.pos);
 				return true;
diff --git a/src/checkstyle/checks/block/RightCurlyCheck.hx b/src/checkstyle/checks/block/RightCurlyCheck.hx
index d9a9a011..8e3eb44e 100644
--- a/src/checkstyle/checks/block/RightCurlyCheck.hx
+++ b/src/checkstyle/checks/block/RightCurlyCheck.hx
@@ -125,8 +125,8 @@ class RightCurlyCheck extends Check {
 			var linePos:LinePos = checker.getLinePos(curlyPos.max);
 			var afterCurly:String = "";
 			if (!eof) {
-				var afterLine:String = checker.lines[linePos.line];
-				if (linePos.ofs < afterLine.length) afterCurly = afterLine.substr(linePos.ofs);
+				var afterLine:Bytes = Bytes.ofString(checker.lines[linePos.line]);
+				if (linePos.ofs < afterLine.length) afterCurly = afterLine.sub(linePos.ofs, afterLine.length - linePos.ofs).toString();
 			}
 			// only else and catch allowed on same line after a right curly
 			var sameRegex = ~/^\s*(else|catch)/;
diff --git a/src/checkstyle/checks/imports/UnusedImportCheck.hx b/src/checkstyle/checks/imports/UnusedImportCheck.hx
index c514478e..20f19516 100644
--- a/src/checkstyle/checks/imports/UnusedImportCheck.hx
+++ b/src/checkstyle/checks/imports/UnusedImportCheck.hx
@@ -36,7 +36,7 @@ class UnusedImportCheck extends Check {
 		var stringLiterals:Array<TokenTree> = root.filterCallback(function(token:TokenTree, depth:Int):FilterResult {
 			switch (token.tok) {
 				case Const(CString(text)):
-					if (checker.file.content.substr(token.pos.min, 1) != "'") return GO_DEEPER;
+					if (checker.getString(token.pos.min, token.pos.min + 1) != "'") return GO_DEEPER;
 					if (~/\$\{[^\}]+\.[^\}]+\}/.match (text)) return FOUND_GO_DEEPER;
 				default:
 			}
diff --git a/src/checkstyle/checks/literal/StringLiteralCheck.hx b/src/checkstyle/checks/literal/StringLiteralCheck.hx
index 266c45ee..7a62152b 100644
--- a/src/checkstyle/checks/literal/StringLiteralCheck.hx
+++ b/src/checkstyle/checks/literal/StringLiteralCheck.hx
@@ -38,7 +38,7 @@ class StringLiteralCheck extends Check {
 	}
 
 	function checkLiteral(s:String, pos:Position) {
-		var quote:String = checker.file.content.substr(pos.min, 1);
+		var quote:String = checker.getString(pos.min, pos.min + 1);
 		var singleQuote:Bool = quote == "'";
 		switch (policy) {
 			case ONLY_DOUBLE:
diff --git a/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx b/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx
index d9cec679..016e39fa 100644
--- a/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx
+++ b/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx
@@ -32,8 +32,8 @@ class ArrayAccessCheck extends Check {
 					}
 
 					if (!spaceInside) {
-						if (checker.file.content.substr(e2.pos.min - 1, 1) == " ") logPos("Space between [ and index", e.pos);
-						if (checker.file.content.substr(e2.pos.max, 1) == " ") logPos("Space between index and ]", e.pos);
+						if (checker.getString(e2.pos.min - 1, e2.pos.min) == " ") logPos("Space between [ and index", e.pos);
+						if (checker.getString(e2.pos.max, e2.pos.max + 1) == " ") logPos("Space between index and ]", e.pos);
 					}
 				default:
 			}
diff --git a/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx b/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx
index 14b92ace..471cf22b 100644
--- a/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx
+++ b/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx
@@ -87,7 +87,7 @@ class WhitespaceAfterCheck extends Check {
 			if (isPosSuppressed(tok.pos)) continue;
 			if (TokenTreeCheckUtils.filterOpSub(tok)) continue;
 
-			var contentAfter:String = checker.file.content.substr(tok.pos.max, 1);
+			var contentAfter:String = checker.getString(tok.pos.max, tok.pos.max + 1);
 			if (~/^(\s|)$/.match(contentAfter)) continue;
 
 			logPos('No whitespace after "$tok"', tok.pos);
diff --git a/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx b/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx
index 7be050ee..ed8d4b49 100644
--- a/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx
+++ b/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx
@@ -125,10 +125,11 @@ class WhitespaceAroundCheck extends Check {
 			if (TokenTreeCheckUtils.filterOpSub(tok)) continue;
 
 			var linePos:LinePos = checker.getLinePos(tok.pos.min);
-			var line:String = checker.lines[linePos.line];
-			var before:String = line.substr(0, linePos.ofs);
+			var line:Bytes = Bytes.ofString(checker.lines[linePos.line]);
+			var before:String = line.sub(0, linePos.ofs).toString();
 			var tokLen:Int = tok.toString().length;
-			var after:String = line.substr(linePos.ofs + tokLen);
+			var offs:Int = linePos.ofs + tokLen;
+			var after:String = line.sub(offs, line.length - offs).toString();
 
 			if (!(~/^.*\s$/.match(before))) {
 				logPos('No whitespace around "$tok"', tok.pos);
diff --git a/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx b/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx
index c9c8cf7e..5bbb40e0 100644
--- a/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx
+++ b/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx
@@ -69,9 +69,10 @@ class WhitespaceCheckBase extends Check {
 			linePos = checker.getLinePos(tok.pos.max - 3);
 			tokLen = 3;
 		}
-		var line:String = checker.lines[linePos.line];
-		var before:String = line.substr(0, linePos.ofs);
-		var after:String = line.substr(linePos.ofs + tokLen);
+		var line:Bytes = Bytes.ofString(checker.lines[linePos.line]);
+		var before:String = line.sub(0, linePos.ofs).toString();
+		var offs:Int = linePos.ofs + tokLen;
+		var after:String = line.sub(offs, line.length - offs).toString();
 
 		var whitespaceBefore:Bool = ~/^(.*\s|)$/.match(before);
 		var whitespaceAfter:Bool = ~/^(\s.*|)$/.match(after);
diff --git a/src/checkstyle/checks/whitespace/WrapCheckBase.hx b/src/checkstyle/checks/whitespace/WrapCheckBase.hx
index f14da649..446b491f 100644
--- a/src/checkstyle/checks/whitespace/WrapCheckBase.hx
+++ b/src/checkstyle/checks/whitespace/WrapCheckBase.hx
@@ -29,10 +29,11 @@ class WrapCheckBase extends Check {
 			if (TokenTreeCheckUtils.filterOpSub(tok)) continue;
 
 			var linePos:LinePos = checker.getLinePos(tok.pos.min);
-			var line:String = checker.lines[linePos.line];
-			var before:String = line.substr(0, linePos.ofs);
+			var line:Bytes = Bytes.ofString(checker.lines[linePos.line]);
+			var before:String = line.sub(0, linePos.ofs).toString();
 			var tokLen:Int = tok.toString().length;
-			var after:String = line.substr(linePos.ofs + tokLen);
+			var offs:Int = linePos.ofs + tokLen;
+			var after:String = line.sub(offs, line.length - offs).toString();
 
 			if (~/^\s*$/.match(before)) {
 				if (option != NL) {
diff --git a/src/checkstyle/import.hx b/src/checkstyle/import.hx
index 3428120c..b3f18fee 100644
--- a/src/checkstyle/import.hx
+++ b/src/checkstyle/import.hx
@@ -1,5 +1,7 @@
 package checkstyle;
 
+import haxe.io.Bytes;
+
 import haxe.macro.Expr;
 import haxeparser.Data;
 
diff --git a/src/checkstyle/utils/StringUtils.hx b/src/checkstyle/utils/StringUtils.hx
index 22af73cc..9b5b58ca 100644
--- a/src/checkstyle/utils/StringUtils.hx
+++ b/src/checkstyle/utils/StringUtils.hx
@@ -6,8 +6,9 @@ class StringUtils {
 		return s.indexOf(c) != -1;
 	}
 
-	public static function isStringInterpolation(s:String, fileContent:String, pos:Position):Bool {
-		var quote:String = fileContent.substr(pos.min, 1);
+	public static function isStringInterpolation(s:String, fileContent:byte.ByteData, pos:Position):Bool {
+		var code:Bytes = cast fileContent;
+		var quote:String = code.sub(pos.min, 1).toString();
 		if (quote != "'") return false;
 		var regex:EReg = ~/(^|[^$])\$(\{|[a-zA-Z0-9_]+)/;
 		return regex.match(s);
diff --git a/test/checks/CheckTestCase.hx b/test/checks/CheckTestCase.hx
index b323bcc2..c3db2106 100644
--- a/test/checks/CheckTestCase.hx
+++ b/test/checks/CheckTestCase.hx
@@ -1,5 +1,7 @@
 package checks;
 
+import byte.ByteData;
+
 import haxe.PosInfos;
 
 import checkstyle.CheckMessage;
@@ -60,7 +62,7 @@ class CheckTestCase<T:String> {
 
 		ReporterManager.INSTANCE.clear();
 		ReporterManager.INSTANCE.addReporter(reporter);
-		checker.process([{name:fileName, content:src, index:0}], null);
+		checker.process([{name:fileName, content:ByteData.ofString(src), index:0}], null);
 		return reporter.message;
 	}
 
diff --git a/test/misc/ThreadTest.hx b/test/misc/ThreadTest.hx
index 8d82a545..dc3a8868 100644
--- a/test/misc/ThreadTest.hx
+++ b/test/misc/ThreadTest.hx
@@ -1,5 +1,7 @@
 package misc;
 
+import byte.ByteData;
+
 import checkstyle.CheckFile;
 import checkstyle.reporter.ReporterManager;
 import checkstyle.Checker;
@@ -82,10 +84,11 @@ class ThreadTest {
 
 	function setupFiles(count:Int):Array<CheckFile> {
 		var files:Array<CheckFile> = [];
+		var content:ByteData = ByteData.ofString(IndentationCheckTests.CORRECT_TAB_INDENT);
 		for (i in 0...count) {
 			files.push({
 				name: 'test_$i.hx',
-				content: IndentationCheckTests.CORRECT_TAB_INDENT,
+				content: content,
 				index: i
 			});
 		}