-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#22] Change parsing engine from regex to PEG (parboiled)
parboiled is PEG (Parsing Expression Grammar) implementation. PEG is more concise than regex, and regex could not handle recursive structure well. GcEventNode is added to access the parsed data easily. Its concrete class is generated by Auto Value library which utilizes annotation processing. IDE setup is required and the instruction can be found here: google/auto#106
- Loading branch information
1 parent
008ccf7
commit 5d570f8
Showing
8 changed files
with
375 additions
and
214 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
src/generated | ||
src/generated_tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,14 @@ | ||
apply plugin: 'idea' | ||
|
||
dependencies { | ||
compile project(':common') | ||
compile 'org.apache.commons:commons-lang3:3.4' | ||
} | ||
compile project(':common') | ||
compile 'org.apache.commons:commons-lang3:3.4' | ||
compile 'org.parboiled:parboiled-java:1.1.7' | ||
compile 'com.google.auto.value:auto-value:1.2' | ||
} | ||
|
||
idea { | ||
module { | ||
sourceDirs += file("${projectDir}/src/generated") | ||
} | ||
} |
196 changes: 196 additions & 0 deletions
196
parser/src/main/java/edu/kaist/algo/parser/CmsGcLogRule.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
package edu.kaist.algo.parser; | ||
|
||
import org.apache.commons.lang3.StringUtils; | ||
import org.parboiled.BaseParser; | ||
import org.parboiled.Rule; | ||
import org.parboiled.annotations.BuildParseTree; | ||
import org.parboiled.annotations.Label; | ||
import org.parboiled.annotations.SuppressSubnodes; | ||
|
||
/** | ||
* PEG (Parsing Expression Grammar) for the CMS GC log. | ||
* | ||
* <p>This PEG is for parsing the one line of CMS GC logs. The line should be complete. That is, | ||
* it should not be cut off by another thread's interference. | ||
* | ||
* <p>Beware: CMS-related logs are not supported yet. | ||
* | ||
* <p>Following options are required: | ||
* <ul> | ||
* <li>-XX:+UseConcMarkSweepGC</li> | ||
* <li>-XX:+UnlockDiagnosticVMOptions</li> | ||
* <li>-XX:+LogVMOutput</li> | ||
* <li>-XX:+PrintGCDetails</li> | ||
* <li>-XX:+PrintGCTimeStamps</li> | ||
* </ul> | ||
* | ||
* <p>PEG (whitespaces are ignored for conciseness): | ||
* <pre> | ||
* InputLine <- Event UserSysRealTimes | ||
* Event <- (Time ': ')? '[' TypeAndDetail (Event)* UsageAndElapsedTime ']' | ||
* Time <- Digits '.' Digits ' secs'? | ||
* Digits <- [0-9]+ | ||
* TypeAndDetail <- Type ('(' Detail ')')? ': '? | ||
* Type <- 'GC' / 'ParNew' / 'CMS' / 'Full GC' / 'Metaspace' / '1 CMS-initial-mark' | ||
* / 'YG occupancy' / 'Rescan (parallel)' / 'weak refs processing' / 'class unloading' | ||
* / 'scrub symbol table' / 'scrub string table' / '1 CMS-remark' | ||
* Detail <- 'System.gc()' / !')'+ | ||
* UsageAndElapsedTime <- UsageChange? (', ' Event)? (', ' Time)? | ||
* UsageChange <- (Size '-&<span>gt;</span>')? UsageWithTotal | ||
* UsageWithTotal <- Size '(' Size ')' | ||
* Size <- Digits 'K ' | ||
* UserSysRealTimes <- '[ Times: user=' Time ' sys=' Time ', real=' Time ']' | ||
* </pre> | ||
*/ | ||
@BuildParseTree | ||
public class CmsGcLogRule extends BaseParser<Object> { | ||
|
||
Rule InputLine() { | ||
return Sequence( | ||
push(GcEventNode.builder()), | ||
Event(), | ||
UserSysRealTimes(), | ||
push(popAsNode().build()) | ||
); | ||
} | ||
|
||
Rule Event() { | ||
return Sequence( | ||
Optional( | ||
TimeLong(), ": ", | ||
swap() && push(popAsNode().timestamp(popAsLong())) | ||
), | ||
"[", TypeAndDetail(), " ", | ||
ZeroOrMore( | ||
push(GcEventNode.builder()), | ||
Event(), | ||
swap() && push(popAsNode().addChild(popAsNode().build())) | ||
), | ||
" ", UsageAndElapsedTime(), "] " | ||
); | ||
} | ||
|
||
Rule TypeAndDetail() { | ||
return Sequence( | ||
Type(), | ||
push(popAsNode().type(match())), | ||
Optional(" ", "(", Detail(), push(popAsNode().detail(match())), ")"), | ||
Optional(": ") | ||
); | ||
} | ||
|
||
@SuppressSubnodes | ||
Rule Type() { | ||
return FirstOf("GC", "ParNew", "CMS", "Full GC", "Metaspace", "1 CMS-initial-mark", | ||
"YG occupancy", "Rescan (parallel)", "weak refs processing", "class unloading", | ||
"scrub symbol table", "scrub string table", "1 CMS-remark"); | ||
} | ||
|
||
@SuppressSubnodes | ||
Rule Detail() { | ||
return FirstOf("System.gc()", OneOrMore(NoneOf(")"))); | ||
} | ||
|
||
Rule UsageAndElapsedTime() { | ||
return Sequence( | ||
Optional(UsageChange()), | ||
Optional(", ", | ||
push(GcEventNode.builder()), | ||
Event(), // Metaspace | ||
swap() && push(popAsNode().addChild(popAsNode().build())) | ||
), | ||
Optional(", ", | ||
TimeDouble(), | ||
swap() && push(popAsNode().elapsedTime(popAsDouble())) | ||
) | ||
); | ||
} | ||
|
||
Rule UsageChange() { | ||
return Sequence( | ||
Optional( | ||
Size(), "->", | ||
swap() && push(popAsNode().prevUsage(popAsLong())) | ||
), | ||
UsageWithTotal() | ||
); | ||
} | ||
|
||
Rule UsageWithTotal() { | ||
return Sequence( | ||
Size(), | ||
"(", Size(), ")", | ||
swap3() && push(popAsNode().afterUsage(popAsLong())), | ||
push(popAsNode().capacity(popAsLong())) | ||
); | ||
} | ||
|
||
Rule Size() { | ||
return Sequence( | ||
Digits(), | ||
push(Long.valueOf(match())), | ||
WhiteSpace(), "K " | ||
); | ||
} | ||
|
||
Rule UserSysRealTimes() { | ||
return Sequence( | ||
"[", "Times: ", "user=", TimeDouble(), " sys=", TimeDouble(), ", real=", TimeDouble(), "]", | ||
swap4() && push(popAsNode().user(popAsDouble())), | ||
push(popAsNode().sys(popAsDouble())), | ||
push(popAsNode().real(popAsDouble())) | ||
); | ||
} | ||
|
||
@Label("Time") | ||
@SuppressSubnodes | ||
Rule TimeDouble() { | ||
return Sequence( | ||
Sequence(Digits(), ".", Digits()), | ||
push(Double.valueOf(match())), | ||
Optional(" secs") | ||
); | ||
} | ||
|
||
@Label("Time") | ||
@SuppressSubnodes | ||
Rule TimeLong() { | ||
return Sequence( | ||
Sequence(Digits(), ".", Digits()), | ||
push(Long.valueOf(StringUtils.remove(match(), "."))) | ||
); | ||
} | ||
|
||
@SuppressSubnodes | ||
Rule Digits() { | ||
return OneOrMore(Digit()); | ||
} | ||
|
||
Rule Digit() { | ||
return CharRange('0', '9'); | ||
} | ||
|
||
@SuppressSubnodes | ||
Rule WhiteSpace() { | ||
return ZeroOrMore(AnyOf(" \t\f")); | ||
} | ||
|
||
@Override | ||
protected Rule fromStringLiteral(String string) { | ||
return string.endsWith(" ") | ||
? Sequence(String(string.substring(0, string.length() - 1)), WhiteSpace()) | ||
: String(string); | ||
} | ||
|
||
protected Double popAsDouble() { | ||
return (Double) pop(); | ||
} | ||
|
||
protected GcEventNode.Builder popAsNode() { | ||
return (GcEventNode.Builder) pop(); | ||
} | ||
|
||
protected Long popAsLong() { | ||
return (Long) pop(); | ||
} | ||
} |
Oops, something went wrong.