Skip to content

Commit

Permalink
[#22] Change parsing engine from regex to PEG (parboiled)
Browse files Browse the repository at this point in the history
parboiled is PEG (Parsing Expression Grammar) implementation.
PEG is more concise than regex, and regex could not handle recursive
structure well.

GcEventNode is added to access the parsed data easily. Its concrete class is
generated by Auto Value library which utilizes annotation processing. IDE setup
is required and the instruction can be found here:
    google/auto#106
  • Loading branch information
ducky-hong committed May 17, 2016
1 parent 008ccf7 commit 5d570f8
Show file tree
Hide file tree
Showing 8 changed files with 375 additions and 214 deletions.
2 changes: 1 addition & 1 deletion common/src/main/proto/gc_model.proto
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ message GcEvent {
double sys_time = 7; // time spent in OS call or waiting for system event
double real_time = 8; // (user_time + sys_tim) / threads# + alpha
double ref_time = 9; // reference processing time
}
}
2 changes: 2 additions & 0 deletions parser/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
src/generated
src/generated_tests
16 changes: 13 additions & 3 deletions parser/build.gradle
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
apply plugin: 'idea'

dependencies {
compile project(':common')
compile 'org.apache.commons:commons-lang3:3.4'
}
compile project(':common')
compile 'org.apache.commons:commons-lang3:3.4'
compile 'org.parboiled:parboiled-java:1.1.7'
compile 'com.google.auto.value:auto-value:1.2'
}

idea {
module {
sourceDirs += file("${projectDir}/src/generated")
}
}
196 changes: 196 additions & 0 deletions parser/src/main/java/edu/kaist/algo/parser/CmsGcLogRule.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package edu.kaist.algo.parser;

import org.apache.commons.lang3.StringUtils;
import org.parboiled.BaseParser;
import org.parboiled.Rule;
import org.parboiled.annotations.BuildParseTree;
import org.parboiled.annotations.Label;
import org.parboiled.annotations.SuppressSubnodes;

/**
* PEG (Parsing Expression Grammar) for the CMS GC log.
*
* <p>This PEG is for parsing the one line of CMS GC logs. The line should be complete. That is,
* it should not be cut off by another thread's interference.
*
* <p>Beware: CMS-related logs are not supported yet.
*
* <p>Following options are required:
* <ul>
* <li>-XX:+UseConcMarkSweepGC</li>
* <li>-XX:+UnlockDiagnosticVMOptions</li>
* <li>-XX:+LogVMOutput</li>
* <li>-XX:+PrintGCDetails</li>
* <li>-XX:+PrintGCTimeStamps</li>
* </ul>
*
* <p>PEG (whitespaces are ignored for conciseness):
* <pre>
* InputLine <- Event UserSysRealTimes
* Event <- (Time ': ')? '[' TypeAndDetail (Event)* UsageAndElapsedTime ']'
* Time <- Digits '.' Digits ' secs'?
* Digits <- [0-9]+
* TypeAndDetail <- Type ('(' Detail ')')? ': '?
* Type <- 'GC' / 'ParNew' / 'CMS' / 'Full GC' / 'Metaspace' / '1 CMS-initial-mark'
* / 'YG occupancy' / 'Rescan (parallel)' / 'weak refs processing' / 'class unloading'
* / 'scrub symbol table' / 'scrub string table' / '1 CMS-remark'
* Detail <- 'System.gc()' / !')'+
* UsageAndElapsedTime <- UsageChange? (', ' Event)? (', ' Time)?
* UsageChange <- (Size '-&<span>gt;</span>')? UsageWithTotal
* UsageWithTotal <- Size '(' Size ')'
* Size <- Digits 'K '
* UserSysRealTimes <- '[ Times: user=' Time ' sys=' Time ', real=' Time ']'
* </pre>
*/
@BuildParseTree
public class CmsGcLogRule extends BaseParser<Object> {

Rule InputLine() {
return Sequence(
push(GcEventNode.builder()),
Event(),
UserSysRealTimes(),
push(popAsNode().build())
);
}

Rule Event() {
return Sequence(
Optional(
TimeLong(), ": ",
swap() && push(popAsNode().timestamp(popAsLong()))
),
"[", TypeAndDetail(), " ",
ZeroOrMore(
push(GcEventNode.builder()),
Event(),
swap() && push(popAsNode().addChild(popAsNode().build()))
),
" ", UsageAndElapsedTime(), "] "
);
}

Rule TypeAndDetail() {
return Sequence(
Type(),
push(popAsNode().type(match())),
Optional(" ", "(", Detail(), push(popAsNode().detail(match())), ")"),
Optional(": ")
);
}

@SuppressSubnodes
Rule Type() {
return FirstOf("GC", "ParNew", "CMS", "Full GC", "Metaspace", "1 CMS-initial-mark",
"YG occupancy", "Rescan (parallel)", "weak refs processing", "class unloading",
"scrub symbol table", "scrub string table", "1 CMS-remark");
}

@SuppressSubnodes
Rule Detail() {
return FirstOf("System.gc()", OneOrMore(NoneOf(")")));
}

Rule UsageAndElapsedTime() {
return Sequence(
Optional(UsageChange()),
Optional(", ",
push(GcEventNode.builder()),
Event(), // Metaspace
swap() && push(popAsNode().addChild(popAsNode().build()))
),
Optional(", ",
TimeDouble(),
swap() && push(popAsNode().elapsedTime(popAsDouble()))
)
);
}

Rule UsageChange() {
return Sequence(
Optional(
Size(), "-&gt;",
swap() && push(popAsNode().prevUsage(popAsLong()))
),
UsageWithTotal()
);
}

Rule UsageWithTotal() {
return Sequence(
Size(),
"(", Size(), ")",
swap3() && push(popAsNode().afterUsage(popAsLong())),
push(popAsNode().capacity(popAsLong()))
);
}

Rule Size() {
return Sequence(
Digits(),
push(Long.valueOf(match())),
WhiteSpace(), "K "
);
}

Rule UserSysRealTimes() {
return Sequence(
"[", "Times: ", "user=", TimeDouble(), " sys=", TimeDouble(), ", real=", TimeDouble(), "]",
swap4() && push(popAsNode().user(popAsDouble())),
push(popAsNode().sys(popAsDouble())),
push(popAsNode().real(popAsDouble()))
);
}

@Label("Time")
@SuppressSubnodes
Rule TimeDouble() {
return Sequence(
Sequence(Digits(), ".", Digits()),
push(Double.valueOf(match())),
Optional(" secs")
);
}

@Label("Time")
@SuppressSubnodes
Rule TimeLong() {
return Sequence(
Sequence(Digits(), ".", Digits()),
push(Long.valueOf(StringUtils.remove(match(), ".")))
);
}

@SuppressSubnodes
Rule Digits() {
return OneOrMore(Digit());
}

Rule Digit() {
return CharRange('0', '9');
}

@SuppressSubnodes
Rule WhiteSpace() {
return ZeroOrMore(AnyOf(" \t\f"));
}

@Override
protected Rule fromStringLiteral(String string) {
return string.endsWith(" ")
? Sequence(String(string.substring(0, string.length() - 1)), WhiteSpace())
: String(string);
}

protected Double popAsDouble() {
return (Double) pop();
}

protected GcEventNode.Builder popAsNode() {
return (GcEventNode.Builder) pop();
}

protected Long popAsLong() {
return (Long) pop();
}
}
Loading

0 comments on commit 5d570f8

Please sign in to comment.