From 7aa7a8538aa263ec5f2b44aee5154893fc396c7c Mon Sep 17 00:00:00 2001 From: AlexHaxe Date: Tue, 1 May 2018 17:28:02 +0200 Subject: [PATCH] refactored to use Bytes instead of String for file contents, see #98 (#402) * refactored to use Bytes instead of String for file contents, see #98 --- CHANGES.md | 1 + checkstyle.json | 2 +- src/checkstyle/CheckFile.hx | 4 ++- src/checkstyle/Checker.hx | 30 ++++++++++--------- .../block/ConditionalCompilationCheck.hx | 14 ++++----- .../checks/block/RightCurlyCheck.hx | 4 +-- .../checks/imports/UnusedImportCheck.hx | 2 +- .../checks/literal/StringLiteralCheck.hx | 2 +- .../checks/whitespace/ArrayAccessCheck.hx | 4 +-- .../checks/whitespace/WhitespaceAfterCheck.hx | 2 +- .../whitespace/WhitespaceAroundCheck.hx | 7 +++-- .../checks/whitespace/WhitespaceCheckBase.hx | 7 +++-- .../checks/whitespace/WrapCheckBase.hx | 7 +++-- src/checkstyle/import.hx | 2 ++ src/checkstyle/utils/StringUtils.hx | 5 ++-- test/checks/CheckTestCase.hx | 4 ++- test/misc/ThreadTest.hx | 5 +++- 17 files changed, 59 insertions(+), 43 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 726be202..b7e605ac 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,7 @@ - Fixed BkOpen childs in token tree parser [#398](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/398) - Fixed bad offset crash with C++ build on7 Windows 10 [#398](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/398) - Fixed object declaration handling [#399](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/399) +- Refactored content handling to use Bytes instead of String (should fix [#98](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/98)) [#402](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/402) - Added unittests for ParserQueue and CheckerPool [#393](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/393) - Added unittests for TokenTree structure verification [#400](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/400) - Removed `.` from default settings in SeparatorWrapCheck [#400](https://github.com/HaxeCheckstyle/haxe-checkstyle/issues/400) diff --git a/checkstyle.json b/checkstyle.json index eae853e7..b50ac2b1 100644 --- a/checkstyle.json +++ b/checkstyle.json @@ -171,7 +171,7 @@ }, { "props": { - "ignoreNumbers": [-1, 0, 1, 2, 3, 4.0, 5, 8, 13, 21, 34, 100] + "ignoreNumbers": [-1, 0, 1, 2, 3, 4.0, 5, 8, 10, 13, 21, 34, 100] }, "type": "MagicNumber" }, diff --git a/src/checkstyle/CheckFile.hx b/src/checkstyle/CheckFile.hx index 0f93475d..d9875524 100644 --- a/src/checkstyle/CheckFile.hx +++ b/src/checkstyle/CheckFile.hx @@ -1,7 +1,9 @@ package checkstyle; +import byte.ByteData; + typedef CheckFile = { var name:String; - var content:String; + var content:ByteData; var index:Int; } \ No newline at end of file diff --git a/src/checkstyle/Checker.hx b/src/checkstyle/Checker.hx index ce851b07..1b63aea7 100644 --- a/src/checkstyle/Checker.hx +++ b/src/checkstyle/Checker.hx @@ -1,6 +1,5 @@ package checkstyle; -import byte.ByteData; import haxe.CallStack; import haxeparser.HaxeParser; import haxeparser.HaxeLexer; @@ -15,7 +14,6 @@ import checkstyle.token.TokenTreeBuilder; class Checker { public var file:CheckFile; - public var bytes:ByteData; public var lines:Array; public var tokens:Array; public var ast:Ast; @@ -41,19 +39,19 @@ class Checker { public function getTokenTree():TokenTree { if (tokens == null) return null; - if (tokenTree == null) tokenTree = TokenTreeBuilder.buildTokenTree(tokens, bytes); + if (tokenTree == null) tokenTree = TokenTreeBuilder.buildTokenTree(tokens, file.content); return tokenTree; } function makePosIndices() { - var code = file.content; + var code:Bytes = cast file.content; linesIdx = []; var last = 0; var left = false; for (i in 0...code.length) { - if (code.charAt(i) == "\n") { + if (code.get(i) == 0x0A) { linesIdx.push({l:last, r:i}); last = i + 1; left = false; @@ -71,11 +69,16 @@ class Checker { } public function getString(off:Int, off2:Int):String { - return file.content.substr(off, off2 - off); + var code:Bytes = cast file.content; + var len:Int = off2 - off; + if ((off >= code.length) || (off + len > code.length)) return ""; + return code.sub(off, off2 - off).toString(); } function findLineSeparator() { - var code = file.content; + var codeBytes:Bytes = cast file.content; + var code:String = codeBytes.toString(); + for (i in 0...code.length) { var char = code.charAt(i); if (char == "\r" || char == "\n") { @@ -92,15 +95,16 @@ class Checker { } function makeLines() { - var code = file.content; - lines = code.split(lineSeparator); + var code:Bytes = cast file.content; + var textCode:String = code.toString(); + lines = textCode.split(lineSeparator); } function makeTokens() { try { tokens = []; tokenTree = null; - var lexer = new HaxeLexer(bytes, file.name); + var lexer = new HaxeLexer(file.content, file.name); var t:Token = lexer.token(HaxeLexer.tok); while (t.tok != Eof) { @@ -128,8 +132,7 @@ class Checker { } function makeAST(defines:Array):Ast { - var code = file.content; - var parser = new HaxeParser(byte.ByteData.ofString(code), file.name); + var parser = new HaxeParser(file.content, file.name); parser.define("cross"); parser.define("scriptable"); parser.define("unsafe"); @@ -176,7 +179,7 @@ class Checker { public function loadFileContent(checkFile:CheckFile) { // unittests set content before running Checker // real checks load content here - if (checkFile.content == null) checkFile.content = File.getContent(checkFile.name); + if (checkFile.content == null) checkFile.content = cast File.getBytes(checkFile.name); } public function unloadFileContent(checkFile:CheckFile) { @@ -185,7 +188,6 @@ class Checker { public function createContext(checkFile:CheckFile):Bool { file = checkFile; - bytes = byte.ByteData.ofString(file.content); ReporterManager.INSTANCE.fileStart(file); try { findLineSeparator(); diff --git a/src/checkstyle/checks/block/ConditionalCompilationCheck.hx b/src/checkstyle/checks/block/ConditionalCompilationCheck.hx index 5c7abb0d..c604c69a 100644 --- a/src/checkstyle/checks/block/ConditionalCompilationCheck.hx +++ b/src/checkstyle/checks/block/ConditionalCompilationCheck.hx @@ -51,8 +51,8 @@ class ConditionalCompilationCheck extends Check { } function checkMultiLine(tok:TokenTree, linePos:LinePos) { - var line:String = checker.lines[linePos.line]; - var prefix:String = line.substr(0, linePos.ofs); + var line:Bytes = Bytes.ofString(checker.lines[linePos.line]); + var prefix:String = line.sub(0, linePos.ofs).toString(); if (checkLine(tok, linePos, line)) return; switch (policy) { @@ -68,8 +68,8 @@ class ConditionalCompilationCheck extends Check { switch (childTok.tok) { case Sharp("else"), Sharp("elseif"), Sharp("end"): var childLinePos:LinePos = checker.getLinePos(childTok.pos.min); - var childLine:String = checker.lines[childLinePos.line]; - var childPrefix:String = childLine.substr(0, childLinePos.ofs); + var childLine:Bytes = Bytes.ofString(checker.lines[childLinePos.line]); + var childPrefix:String = childLine.sub(0, childLinePos.ofs).toString(); if (checkLine(childTok, childLinePos, childLine)) continue; if (childPrefix == prefix) continue; logPos('Indentation of $childTok must match corresponding #if', childTok.pos); @@ -78,9 +78,9 @@ class ConditionalCompilationCheck extends Check { } } - function checkLine(tok:TokenTree, linePos:LinePos, line:String):Bool { + function checkLine(tok:TokenTree, linePos:LinePos, line:Bytes):Bool { var r:EReg = ~/^[ \t]*$/; - var prefix:String = line.substr(0, linePos.ofs); + var prefix:String = line.sub(0, linePos.ofs).toString(); if (!r.match(prefix)) { logPos('only whitespace allowed before $tok', tok.pos); return true; @@ -89,7 +89,7 @@ class ConditionalCompilationCheck extends Check { if (expr == null) return false; var linePosAfter:LinePos = checker.getLinePos(expr.getPos().max); if (linePosAfter.line == linePos.line) { - var postfix:String = line.substr(linePosAfter.ofs); + var postfix:String = line.sub(linePosAfter.ofs, line.length - linePosAfter.ofs).toString(); if (!r.match(postfix)) { logPos('only whitespace allowed after $tok', tok.pos); return true; diff --git a/src/checkstyle/checks/block/RightCurlyCheck.hx b/src/checkstyle/checks/block/RightCurlyCheck.hx index d9a9a011..8e3eb44e 100644 --- a/src/checkstyle/checks/block/RightCurlyCheck.hx +++ b/src/checkstyle/checks/block/RightCurlyCheck.hx @@ -125,8 +125,8 @@ class RightCurlyCheck extends Check { var linePos:LinePos = checker.getLinePos(curlyPos.max); var afterCurly:String = ""; if (!eof) { - var afterLine:String = checker.lines[linePos.line]; - if (linePos.ofs < afterLine.length) afterCurly = afterLine.substr(linePos.ofs); + var afterLine:Bytes = Bytes.ofString(checker.lines[linePos.line]); + if (linePos.ofs < afterLine.length) afterCurly = afterLine.sub(linePos.ofs, afterLine.length - linePos.ofs).toString(); } // only else and catch allowed on same line after a right curly var sameRegex = ~/^\s*(else|catch)/; diff --git a/src/checkstyle/checks/imports/UnusedImportCheck.hx b/src/checkstyle/checks/imports/UnusedImportCheck.hx index c514478e..20f19516 100644 --- a/src/checkstyle/checks/imports/UnusedImportCheck.hx +++ b/src/checkstyle/checks/imports/UnusedImportCheck.hx @@ -36,7 +36,7 @@ class UnusedImportCheck extends Check { var stringLiterals:Array = root.filterCallback(function(token:TokenTree, depth:Int):FilterResult { switch (token.tok) { case Const(CString(text)): - if (checker.file.content.substr(token.pos.min, 1) != "'") return GO_DEEPER; + if (checker.getString(token.pos.min, token.pos.min + 1) != "'") return GO_DEEPER; if (~/\$\{[^\}]+\.[^\}]+\}/.match (text)) return FOUND_GO_DEEPER; default: } diff --git a/src/checkstyle/checks/literal/StringLiteralCheck.hx b/src/checkstyle/checks/literal/StringLiteralCheck.hx index 266c45ee..7a62152b 100644 --- a/src/checkstyle/checks/literal/StringLiteralCheck.hx +++ b/src/checkstyle/checks/literal/StringLiteralCheck.hx @@ -38,7 +38,7 @@ class StringLiteralCheck extends Check { } function checkLiteral(s:String, pos:Position) { - var quote:String = checker.file.content.substr(pos.min, 1); + var quote:String = checker.getString(pos.min, pos.min + 1); var singleQuote:Bool = quote == "'"; switch (policy) { case ONLY_DOUBLE: diff --git a/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx b/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx index d9cec679..016e39fa 100644 --- a/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx +++ b/src/checkstyle/checks/whitespace/ArrayAccessCheck.hx @@ -32,8 +32,8 @@ class ArrayAccessCheck extends Check { } if (!spaceInside) { - if (checker.file.content.substr(e2.pos.min - 1, 1) == " ") logPos("Space between [ and index", e.pos); - if (checker.file.content.substr(e2.pos.max, 1) == " ") logPos("Space between index and ]", e.pos); + if (checker.getString(e2.pos.min - 1, e2.pos.min) == " ") logPos("Space between [ and index", e.pos); + if (checker.getString(e2.pos.max, e2.pos.max + 1) == " ") logPos("Space between index and ]", e.pos); } default: } diff --git a/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx b/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx index 14b92ace..471cf22b 100644 --- a/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx +++ b/src/checkstyle/checks/whitespace/WhitespaceAfterCheck.hx @@ -87,7 +87,7 @@ class WhitespaceAfterCheck extends Check { if (isPosSuppressed(tok.pos)) continue; if (TokenTreeCheckUtils.filterOpSub(tok)) continue; - var contentAfter:String = checker.file.content.substr(tok.pos.max, 1); + var contentAfter:String = checker.getString(tok.pos.max, tok.pos.max + 1); if (~/^(\s|)$/.match(contentAfter)) continue; logPos('No whitespace after "$tok"', tok.pos); diff --git a/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx b/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx index 7be050ee..ed8d4b49 100644 --- a/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx +++ b/src/checkstyle/checks/whitespace/WhitespaceAroundCheck.hx @@ -125,10 +125,11 @@ class WhitespaceAroundCheck extends Check { if (TokenTreeCheckUtils.filterOpSub(tok)) continue; var linePos:LinePos = checker.getLinePos(tok.pos.min); - var line:String = checker.lines[linePos.line]; - var before:String = line.substr(0, linePos.ofs); + var line:Bytes = Bytes.ofString(checker.lines[linePos.line]); + var before:String = line.sub(0, linePos.ofs).toString(); var tokLen:Int = tok.toString().length; - var after:String = line.substr(linePos.ofs + tokLen); + var offs:Int = linePos.ofs + tokLen; + var after:String = line.sub(offs, line.length - offs).toString(); if (!(~/^.*\s$/.match(before))) { logPos('No whitespace around "$tok"', tok.pos); diff --git a/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx b/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx index c9c8cf7e..5bbb40e0 100644 --- a/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx +++ b/src/checkstyle/checks/whitespace/WhitespaceCheckBase.hx @@ -69,9 +69,10 @@ class WhitespaceCheckBase extends Check { linePos = checker.getLinePos(tok.pos.max - 3); tokLen = 3; } - var line:String = checker.lines[linePos.line]; - var before:String = line.substr(0, linePos.ofs); - var after:String = line.substr(linePos.ofs + tokLen); + var line:Bytes = Bytes.ofString(checker.lines[linePos.line]); + var before:String = line.sub(0, linePos.ofs).toString(); + var offs:Int = linePos.ofs + tokLen; + var after:String = line.sub(offs, line.length - offs).toString(); var whitespaceBefore:Bool = ~/^(.*\s|)$/.match(before); var whitespaceAfter:Bool = ~/^(\s.*|)$/.match(after); diff --git a/src/checkstyle/checks/whitespace/WrapCheckBase.hx b/src/checkstyle/checks/whitespace/WrapCheckBase.hx index f14da649..446b491f 100644 --- a/src/checkstyle/checks/whitespace/WrapCheckBase.hx +++ b/src/checkstyle/checks/whitespace/WrapCheckBase.hx @@ -29,10 +29,11 @@ class WrapCheckBase extends Check { if (TokenTreeCheckUtils.filterOpSub(tok)) continue; var linePos:LinePos = checker.getLinePos(tok.pos.min); - var line:String = checker.lines[linePos.line]; - var before:String = line.substr(0, linePos.ofs); + var line:Bytes = Bytes.ofString(checker.lines[linePos.line]); + var before:String = line.sub(0, linePos.ofs).toString(); var tokLen:Int = tok.toString().length; - var after:String = line.substr(linePos.ofs + tokLen); + var offs:Int = linePos.ofs + tokLen; + var after:String = line.sub(offs, line.length - offs).toString(); if (~/^\s*$/.match(before)) { if (option != NL) { diff --git a/src/checkstyle/import.hx b/src/checkstyle/import.hx index 3428120c..b3f18fee 100644 --- a/src/checkstyle/import.hx +++ b/src/checkstyle/import.hx @@ -1,5 +1,7 @@ package checkstyle; +import haxe.io.Bytes; + import haxe.macro.Expr; import haxeparser.Data; diff --git a/src/checkstyle/utils/StringUtils.hx b/src/checkstyle/utils/StringUtils.hx index 22af73cc..9b5b58ca 100644 --- a/src/checkstyle/utils/StringUtils.hx +++ b/src/checkstyle/utils/StringUtils.hx @@ -6,8 +6,9 @@ class StringUtils { return s.indexOf(c) != -1; } - public static function isStringInterpolation(s:String, fileContent:String, pos:Position):Bool { - var quote:String = fileContent.substr(pos.min, 1); + public static function isStringInterpolation(s:String, fileContent:byte.ByteData, pos:Position):Bool { + var code:Bytes = cast fileContent; + var quote:String = code.sub(pos.min, 1).toString(); if (quote != "'") return false; var regex:EReg = ~/(^|[^$])\$(\{|[a-zA-Z0-9_]+)/; return regex.match(s); diff --git a/test/checks/CheckTestCase.hx b/test/checks/CheckTestCase.hx index b323bcc2..c3db2106 100644 --- a/test/checks/CheckTestCase.hx +++ b/test/checks/CheckTestCase.hx @@ -1,5 +1,7 @@ package checks; +import byte.ByteData; + import haxe.PosInfos; import checkstyle.CheckMessage; @@ -60,7 +62,7 @@ class CheckTestCase { ReporterManager.INSTANCE.clear(); ReporterManager.INSTANCE.addReporter(reporter); - checker.process([{name:fileName, content:src, index:0}], null); + checker.process([{name:fileName, content:ByteData.ofString(src), index:0}], null); return reporter.message; } diff --git a/test/misc/ThreadTest.hx b/test/misc/ThreadTest.hx index 8d82a545..dc3a8868 100644 --- a/test/misc/ThreadTest.hx +++ b/test/misc/ThreadTest.hx @@ -1,5 +1,7 @@ package misc; +import byte.ByteData; + import checkstyle.CheckFile; import checkstyle.reporter.ReporterManager; import checkstyle.Checker; @@ -82,10 +84,11 @@ class ThreadTest { function setupFiles(count:Int):Array { var files:Array = []; + var content:ByteData = ByteData.ofString(IndentationCheckTests.CORRECT_TAB_INDENT); for (i in 0...count) { files.push({ name: 'test_$i.hx', - content: IndentationCheckTests.CORRECT_TAB_INDENT, + content: content, index: i }); }