Skip to content

Commit

Permalink
more grammar debugging tools
Browse files Browse the repository at this point in the history
  • Loading branch information
pieter-bos committed May 29, 2024
1 parent 60e445f commit c5f3c1a
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 90 deletions.
64 changes: 54 additions & 10 deletions src/parsers/vct/parsers/debug/ATNTools.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,15 @@ object ATNTools {
case other => other.target
}

edges += ((s, transitionLanguage(recognizer, trans), target))
val language =
trans match {
case rule: RuleTransition
if expandRules.contains(rule.ruleIndex) =>
Seqn()
case other => transitionLanguage(recognizer, other)
}

edges += ((s, language, target))

if (!explored.contains(target)) { toExplore += target }
}
Expand Down Expand Up @@ -118,8 +126,8 @@ object ATNTools {
val startOut = outEdge.getOrElseUpdate(start, mutable.Map())
val endIn = inEdge.getOrElseUpdate(end, mutable.Map())

startOut(end) = Alts(lang, startOut.getOrElse(end, Alts())).simplify
endIn(start) = Alts(lang, endIn.getOrElse(start, Alts())).simplify
startOut(end) = Alts(lang, startOut.getOrElse(end, Alts()))
endIn(start) = Alts(lang, endIn.getOrElse(start, Alts()))
}

def delete(state: ATNState): Unit = {
Expand Down Expand Up @@ -157,13 +165,31 @@ object ATNTools {
def compact(): Unit = {
output(Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-0.dot"))

for ((state, i) <- inEdge.keys.toSeq.zipWithIndex) {
var i = 1

while (
inEdge.size > 2 || outEdge.size > 2 || (inEdge.keys ++ outEdge.keys)
.toSeq.distinct.size > 2
) {
val state = (inEdge.keys ++ outEdge.keys).minBy(state =>
(
state == s0 || state == accept,
(inEdge.getOrElse(state, EMPTY_MAP).size - 1) *
(outEdge.getOrElse(state, EMPTY_MAP).size - 1),
)
)

if (state != s0 && state != accept) { delete(state) }
else
???

output(
Paths
.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-${i + 1}.dot")
)
println(inEdge.size)

/*output(
Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-$i.dot")
)*/

i += 1
}
}

Expand All @@ -180,7 +206,7 @@ object ATNTools {
val endAtReject = Star(
Alts(stayReject, Seqn(goAccept, Star(stayAccept), goReject))
)
Seqn(endAtReject, goAccept, Star(stayAccept)).simplify
Seqn(endAtReject, goAccept, Star(stayAccept))
}
}

Expand All @@ -196,16 +222,34 @@ object ATNTools {
* class of the parser to analyze (e.g. vct.antlr4.generated.CParser)
* Argument 2: parse rule to derive the ATN of (e.g. initializerList)
* Argument 3: output file in DOT/graphviz format (e.g. initializerList.dot)
* Arguments 4: (optional) rules to expand separated by comma, or * to expand
* all rules
*/
def main(args: Array[String]): Unit = {
val parserClass = getClass.getClassLoader.loadClass(args(0))
val parser = parserClass.getConstructor(classOf[TokenStream])
.newInstance(null).asInstanceOf[Recognizer]
val ruleIndex = parser.getRuleIndexMap.get(args(1))
val expand =
args.lift(3) match {
case None => Set.empty[Int]
case Some("*") => parser.getRuleNames.indices.toSet
case Some(list) =>
list.split(",").map(parser.getRuleIndexMap.get(_).toInt).toSet
}
val state = parser.getATN.ruleToStartState(ruleIndex)
val edges = getEdges(parser, state)
val edges = getEdges(parser, state, expandRules = expand)
Using(Files.newBufferedWriter(Paths.get(args(2)))) { w =>
outputGraph(parser, edges, w)
}
val language =
new LanguageGraph(
parser,
state,
parser.getATN.ruleToStopState(ruleIndex),
edges,
)
val lang = language.asRegLang()
lang.render(System.out)
}
}
224 changes: 144 additions & 80 deletions src/parsers/vct/parsers/debug/Grammar.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,77 +7,53 @@ import scala.annotation.tailrec
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters.CollectionHasAsScala
import scala.runtime.ScalaRunTime

trait RenderExp {
def render: (String, Int)

def bind(minPrecedence: Int): String = {
val (text, precedence) = render

if (precedence < minPrecedence)
s"($text)"
else
text
def render(sb: Appendable): Unit
def precedence: Int

def bind(sb: Appendable, minPrecedence: Int): Unit = {
if (precedence < minPrecedence) {
sb.append("(")
render(sb)
sb.append(")")
} else
render(sb)
}

override def toString: String = render._1
override def toString: String = {
val sb = new StringBuilder()
render(new Appendable {
override def append(charSequence: CharSequence): Appendable = {
sb.append(charSequence); this
}
override def append(
charSequence: CharSequence,
i: Int,
i1: Int,
): Appendable = { sb.append(charSequence.subSequence(i, i1)); this }
override def append(c: Char): Appendable = { sb.append(c); this }
})
sb.toString()
}
}

/** Data structure to represent regular expressions with added node to represent
* arbitrary (potentially non-regular) antlr expressions.
*/
sealed trait RegLang extends RenderExp {
private def collectSeqn(buf: ArrayBuffer[RegLang]): Unit =
private[debug] def collectSeqn(buf: ArrayBuffer[RegLang]): Unit =
this match {
case Seqn(langs @ _*) => langs.foreach(_.simplify.collectSeqn(buf))
case Seqn(langs @ _*) => langs.foreach(_.collectSeqn(buf))
case other => buf += other
}

private def collectAlts(buf: mutable.Set[RegLang]): Unit =
private[debug] def collectAlts(buf: mutable.Set[RegLang]): Unit =
this match {
case Alts(langs @ _*) => langs.foreach(_.simplify.collectAlts(buf))
case Alts(langs @ _*) => langs.foreach(_.collectAlts(buf))
case other => buf += other
}

def flatSeqn: Seq[RegLang] = {
val buf = ArrayBuffer[RegLang]()
collectSeqn(buf)
buf.toSeq
}

def flatAlts: Seq[RegLang] = {
val buf = mutable.Set[RegLang]()
collectAlts(buf)
buf.toSeq
}

def simplify: RegLang =
this match {
case tok: Antlr => tok
case Seqn(_ @_*) =>
flatSeqn match {
case Nil => Seqn()
case one :: Nil => one
case more if more.contains(Alts()) => Alts()
case more => Seqn(more: _*)
}
case Alts(_ @_*) =>
flatAlts match {
case Nil => Alts()
case one :: Nil => one
case more => Alts(more: _*)
}
case Star(lang) =>
lang.simplify match {
case Seqn() => Seqn()
case Alts(alts @ _*) =>
alts.filter(_ != Seqn()) match {
case Nil => Seqn()
case more => Star(Alts(more: _*))
}
case other => Star(other)
}
}
}

object Antlr {
Expand Down Expand Up @@ -117,51 +93,139 @@ object Antlr {
}

case class Antlr(text: String, precedence: Int) extends RegLang {
override def render: (String, Int) = text -> precedence
override def render(sb: Appendable): Unit = sb.append(text)
}

object Seqn {
def apply(langs: RegLang*): RegLang = {
val buf = ArrayBuffer[RegLang]()
langs.foreach(_.collectSeqn(buf))
buf.toSeq match {
case Nil => new Seqn()
case one +: Nil => one
case more if more.contains(new Alts()) => new Alts()
case more => new Seqn(more: _*)
}
}
}

case class Seqn(langs: RegLang*) extends RegLang {
override lazy val hashCode: Int = ScalaRunTime._hashCode(this)

@tailrec
private def renderSeqnWithPlus(s: StringBuilder, langs: Seq[RegLang]): Unit =
private def renderSeqnWithPlus(
sb: Appendable,
langs: Seq[RegLang],
safe: Boolean,
): Unit =
langs match {
case Nil =>
case Star(x) :: y :: tail if x == y =>
s.append(x.bind(50)).append("+ ")
renderSeqnWithPlus(s, tail)
case Star(x @ Seqn(xs @ _*)) :: tail if tail.startsWith(xs) =>
s.append(x.bind(50)).append("+ ")
renderSeqnWithPlus(s, tail.drop(xs.size))
case x :: tail =>
s.append(x.bind(50)).append(" ")
renderSeqnWithPlus(s, tail)
case Star(x) +: y +: tail if x == y =>
if (!safe)
sb.append(' ')
x.bind(sb, 50)
sb.append("+")
renderSeqnWithPlus(sb, tail, false)
case Star(x @ Seqn(xs @ _*)) +: tail if tail.startsWith(xs) =>
if (!safe)
sb.append(' ')
x.bind(sb, 50)
sb.append("+")
renderSeqnWithPlus(sb, tail.drop(xs.size), false)
case x +: tail =>
if (!safe)
sb.append(' ')
x.bind(sb, 50)
renderSeqnWithPlus(sb, tail, false)
}

override def render: (String, Int) =
override def render(sb: Appendable): Unit =
langs match {
case Nil => "()" -> 100
case langs =>
val sb = new StringBuilder()
renderSeqnWithPlus(sb, langs)
sb.setLength(sb.length() - 1)
sb.toString -> 50
case Nil => sb.append("()")
case langs => renderSeqnWithPlus(sb, langs, true)
}

override def precedence: Int =
langs match {
case Nil => 100
case _ => 50
}
}

object Alts {
def apply(langs: RegLang*): RegLang = {
val buf = mutable.Set[RegLang]()
langs.foreach(_.collectAlts(buf))
buf.toSeq match {
case Nil => new Alts()
case one +: Nil => one
case more => new Alts(more: _*)
}
}
}

case class Alts(langs: RegLang*) extends RegLang {
override def render: (String, Int) =
override lazy val hashCode: Int = ScalaRunTime._hashCode(this)

override def render(sb: Appendable): Unit =
langs match {
case Nil => sb.append("{false}?")
case lang +: Nil => lang.render(sb)
case langs if langs.contains(Seqn()) =>
langs.filterNot(_ == Seqn()) match {
case Nil => sb.append("()")
case lang +: Nil => lang.bind(sb, 80)
case langs =>
sb.append("(")
langs.head.bind(sb, 0)
for (lang <- langs.tail) {
sb.append(" | ")
lang.bind(sb, 0)
}
sb.append(")?")
}
case langs =>
langs.head.bind(sb, 0)
for (lang <- langs.tail) {
sb.append(" | ")
lang.bind(sb, 0)
}
}

override def precedence: Int =
langs match {
case Nil => "{false}?" -> 100
case lang :: Nil => lang.render
case Nil => 100
case lang +: Nil => lang.precedence
case langs if langs.contains(Seqn()) =>
langs.filterNot(_ == Seqn()) match {
case Nil => "()" -> 100
case lang :: Nil => s"${lang.bind(80)}?" -> 80
case langs => langs.map(_.bind(0)).mkString("(", " | ", ")?") -> 0
case Nil => 100
case _ +: Nil => 80
case _ => 0
}
case _ => 0
}
}

object Star {
def apply(lang: RegLang): RegLang =
lang match {
case Seqn() => new Seqn()
case Alts(alts @ _*) =>
alts.filter(_ != new Seqn()) match {
case Nil => new Seqn()
case more => new Star(new Alts(more: _*))
}
case langs => langs.mkString(" | ") -> 0
case other => new Star(other)
}
}

case class Star(lang: RegLang) extends RegLang {
override def render: (String, Int) = s"${lang.bind(80)}*" -> 80
override lazy val hashCode: Int = ScalaRunTime._hashCode(this)

override def render(sb: Appendable): Unit = {
lang.bind(sb, 80)
sb.append("*")
}

override def precedence: Int = 80
}

0 comments on commit c5f3c1a

Please sign in to comment.