From c5f3c1ae2ea2f9df1cc280da5681f0962ecf22fa Mon Sep 17 00:00:00 2001
From: Pieter Bos
Date: Wed, 29 May 2024 16:00:27 +0200
Subject: [PATCH] more grammar debugging tools
---
src/parsers/vct/parsers/debug/ATNTools.scala | 64 +++++-
src/parsers/vct/parsers/debug/Grammar.scala | 224 ++++++++++++-------
2 files changed, 198 insertions(+), 90 deletions(-)
diff --git a/src/parsers/vct/parsers/debug/ATNTools.scala b/src/parsers/vct/parsers/debug/ATNTools.scala
index ad3570fb5b..466221960f 100644
--- a/src/parsers/vct/parsers/debug/ATNTools.scala
+++ b/src/parsers/vct/parsers/debug/ATNTools.scala
@@ -88,7 +88,15 @@ object ATNTools {
case other => other.target
}
- edges += ((s, transitionLanguage(recognizer, trans), target))
+ val language =
+ trans match {
+ case rule: RuleTransition
+ if expandRules.contains(rule.ruleIndex) =>
+ Seqn()
+ case other => transitionLanguage(recognizer, other)
+ }
+
+ edges += ((s, language, target))
if (!explored.contains(target)) { toExplore += target }
}
@@ -118,8 +126,8 @@ object ATNTools {
val startOut = outEdge.getOrElseUpdate(start, mutable.Map())
val endIn = inEdge.getOrElseUpdate(end, mutable.Map())
- startOut(end) = Alts(lang, startOut.getOrElse(end, Alts())).simplify
- endIn(start) = Alts(lang, endIn.getOrElse(start, Alts())).simplify
+ startOut(end) = Alts(lang, startOut.getOrElse(end, Alts()))
+ endIn(start) = Alts(lang, endIn.getOrElse(start, Alts()))
}
def delete(state: ATNState): Unit = {
@@ -157,13 +165,31 @@ object ATNTools {
def compact(): Unit = {
output(Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-0.dot"))
- for ((state, i) <- inEdge.keys.toSeq.zipWithIndex) {
+ var i = 1
+
+ while (
+ inEdge.size > 2 || outEdge.size > 2 || (inEdge.keys ++ outEdge.keys)
+ .toSeq.distinct.size > 2
+ ) {
+ val state = (inEdge.keys ++ outEdge.keys).minBy(state =>
+ (
+ state == s0 || state == accept,
+ (inEdge.getOrElse(state, EMPTY_MAP).size - 1) *
+ (outEdge.getOrElse(state, EMPTY_MAP).size - 1),
+ )
+ )
+
if (state != s0 && state != accept) { delete(state) }
+ else
+ ???
- output(
- Paths
- .get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-${i + 1}.dot")
- )
+ println(inEdge.size)
+
+ /*output(
+ Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-$i.dot")
+ )*/
+
+ i += 1
}
}
@@ -180,7 +206,7 @@ object ATNTools {
val endAtReject = Star(
Alts(stayReject, Seqn(goAccept, Star(stayAccept), goReject))
)
- Seqn(endAtReject, goAccept, Star(stayAccept)).simplify
+ Seqn(endAtReject, goAccept, Star(stayAccept))
}
}
@@ -196,16 +222,34 @@ object ATNTools {
* class of the parser to analyze (e.g. vct.antlr4.generated.CParser)
* Argument 2: parse rule to derive the ATN of (e.g. initializerList)
* Argument 3: output file in DOT/graphviz format (e.g. initializerList.dot)
+ * Arguments 4: (optional) rules to expand separated by comma, or * to expand
+ * all rules
*/
def main(args: Array[String]): Unit = {
val parserClass = getClass.getClassLoader.loadClass(args(0))
val parser = parserClass.getConstructor(classOf[TokenStream])
.newInstance(null).asInstanceOf[Recognizer]
val ruleIndex = parser.getRuleIndexMap.get(args(1))
+ val expand =
+ args.lift(3) match {
+ case None => Set.empty[Int]
+ case Some("*") => parser.getRuleNames.indices.toSet
+ case Some(list) =>
+ list.split(",").map(parser.getRuleIndexMap.get(_).toInt).toSet
+ }
val state = parser.getATN.ruleToStartState(ruleIndex)
- val edges = getEdges(parser, state)
+ val edges = getEdges(parser, state, expandRules = expand)
Using(Files.newBufferedWriter(Paths.get(args(2)))) { w =>
outputGraph(parser, edges, w)
}
+ val language =
+ new LanguageGraph(
+ parser,
+ state,
+ parser.getATN.ruleToStopState(ruleIndex),
+ edges,
+ )
+ val lang = language.asRegLang()
+ lang.render(System.out)
}
}
diff --git a/src/parsers/vct/parsers/debug/Grammar.scala b/src/parsers/vct/parsers/debug/Grammar.scala
index eb56ac5f7a..6bcf787979 100644
--- a/src/parsers/vct/parsers/debug/Grammar.scala
+++ b/src/parsers/vct/parsers/debug/Grammar.scala
@@ -7,77 +7,53 @@ import scala.annotation.tailrec
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters.CollectionHasAsScala
+import scala.runtime.ScalaRunTime
trait RenderExp {
- def render: (String, Int)
-
- def bind(minPrecedence: Int): String = {
- val (text, precedence) = render
-
- if (precedence < minPrecedence)
- s"($text)"
- else
- text
+ def render(sb: Appendable): Unit
+ def precedence: Int
+
+ def bind(sb: Appendable, minPrecedence: Int): Unit = {
+ if (precedence < minPrecedence) {
+ sb.append("(")
+ render(sb)
+ sb.append(")")
+ } else
+ render(sb)
}
- override def toString: String = render._1
+ override def toString: String = {
+ val sb = new StringBuilder()
+ render(new Appendable {
+ override def append(charSequence: CharSequence): Appendable = {
+ sb.append(charSequence); this
+ }
+ override def append(
+ charSequence: CharSequence,
+ i: Int,
+ i1: Int,
+ ): Appendable = { sb.append(charSequence.subSequence(i, i1)); this }
+ override def append(c: Char): Appendable = { sb.append(c); this }
+ })
+ sb.toString()
+ }
}
/** Data structure to represent regular expressions with added node to represent
* arbitrary (potentially non-regular) antlr expressions.
*/
sealed trait RegLang extends RenderExp {
- private def collectSeqn(buf: ArrayBuffer[RegLang]): Unit =
+ private[debug] def collectSeqn(buf: ArrayBuffer[RegLang]): Unit =
this match {
- case Seqn(langs @ _*) => langs.foreach(_.simplify.collectSeqn(buf))
+ case Seqn(langs @ _*) => langs.foreach(_.collectSeqn(buf))
case other => buf += other
}
- private def collectAlts(buf: mutable.Set[RegLang]): Unit =
+ private[debug] def collectAlts(buf: mutable.Set[RegLang]): Unit =
this match {
- case Alts(langs @ _*) => langs.foreach(_.simplify.collectAlts(buf))
+ case Alts(langs @ _*) => langs.foreach(_.collectAlts(buf))
case other => buf += other
}
-
- def flatSeqn: Seq[RegLang] = {
- val buf = ArrayBuffer[RegLang]()
- collectSeqn(buf)
- buf.toSeq
- }
-
- def flatAlts: Seq[RegLang] = {
- val buf = mutable.Set[RegLang]()
- collectAlts(buf)
- buf.toSeq
- }
-
- def simplify: RegLang =
- this match {
- case tok: Antlr => tok
- case Seqn(_ @_*) =>
- flatSeqn match {
- case Nil => Seqn()
- case one :: Nil => one
- case more if more.contains(Alts()) => Alts()
- case more => Seqn(more: _*)
- }
- case Alts(_ @_*) =>
- flatAlts match {
- case Nil => Alts()
- case one :: Nil => one
- case more => Alts(more: _*)
- }
- case Star(lang) =>
- lang.simplify match {
- case Seqn() => Seqn()
- case Alts(alts @ _*) =>
- alts.filter(_ != Seqn()) match {
- case Nil => Seqn()
- case more => Star(Alts(more: _*))
- }
- case other => Star(other)
- }
- }
}
object Antlr {
@@ -117,51 +93,139 @@ object Antlr {
}
case class Antlr(text: String, precedence: Int) extends RegLang {
- override def render: (String, Int) = text -> precedence
+ override def render(sb: Appendable): Unit = sb.append(text)
+}
+
+object Seqn {
+ def apply(langs: RegLang*): RegLang = {
+ val buf = ArrayBuffer[RegLang]()
+ langs.foreach(_.collectSeqn(buf))
+ buf.toSeq match {
+ case Nil => new Seqn()
+ case one +: Nil => one
+ case more if more.contains(new Alts()) => new Alts()
+ case more => new Seqn(more: _*)
+ }
+ }
}
case class Seqn(langs: RegLang*) extends RegLang {
+ override lazy val hashCode: Int = ScalaRunTime._hashCode(this)
+
@tailrec
- private def renderSeqnWithPlus(s: StringBuilder, langs: Seq[RegLang]): Unit =
+ private def renderSeqnWithPlus(
+ sb: Appendable,
+ langs: Seq[RegLang],
+ safe: Boolean,
+ ): Unit =
langs match {
case Nil =>
- case Star(x) :: y :: tail if x == y =>
- s.append(x.bind(50)).append("+ ")
- renderSeqnWithPlus(s, tail)
- case Star(x @ Seqn(xs @ _*)) :: tail if tail.startsWith(xs) =>
- s.append(x.bind(50)).append("+ ")
- renderSeqnWithPlus(s, tail.drop(xs.size))
- case x :: tail =>
- s.append(x.bind(50)).append(" ")
- renderSeqnWithPlus(s, tail)
+ case Star(x) +: y +: tail if x == y =>
+ if (!safe)
+ sb.append(' ')
+ x.bind(sb, 50)
+ sb.append("+")
+ renderSeqnWithPlus(sb, tail, false)
+ case Star(x @ Seqn(xs @ _*)) +: tail if tail.startsWith(xs) =>
+ if (!safe)
+ sb.append(' ')
+ x.bind(sb, 50)
+ sb.append("+")
+ renderSeqnWithPlus(sb, tail.drop(xs.size), false)
+ case x +: tail =>
+ if (!safe)
+ sb.append(' ')
+ x.bind(sb, 50)
+ renderSeqnWithPlus(sb, tail, false)
}
- override def render: (String, Int) =
+ override def render(sb: Appendable): Unit =
langs match {
- case Nil => "()" -> 100
- case langs =>
- val sb = new StringBuilder()
- renderSeqnWithPlus(sb, langs)
- sb.setLength(sb.length() - 1)
- sb.toString -> 50
+ case Nil => sb.append("()")
+ case langs => renderSeqnWithPlus(sb, langs, true)
+ }
+
+ override def precedence: Int =
+ langs match {
+ case Nil => 100
+ case _ => 50
+ }
+}
+
+object Alts {
+ def apply(langs: RegLang*): RegLang = {
+ val buf = mutable.Set[RegLang]()
+ langs.foreach(_.collectAlts(buf))
+ buf.toSeq match {
+ case Nil => new Alts()
+ case one +: Nil => one
+ case more => new Alts(more: _*)
}
+ }
}
case class Alts(langs: RegLang*) extends RegLang {
- override def render: (String, Int) =
+ override lazy val hashCode: Int = ScalaRunTime._hashCode(this)
+
+ override def render(sb: Appendable): Unit =
+ langs match {
+ case Nil => sb.append("{false}?")
+ case lang +: Nil => lang.render(sb)
+ case langs if langs.contains(Seqn()) =>
+ langs.filterNot(_ == Seqn()) match {
+ case Nil => sb.append("()")
+ case lang +: Nil => lang.bind(sb, 80)
+ case langs =>
+ sb.append("(")
+ langs.head.bind(sb, 0)
+ for (lang <- langs.tail) {
+ sb.append(" | ")
+ lang.bind(sb, 0)
+ }
+ sb.append(")?")
+ }
+ case langs =>
+ langs.head.bind(sb, 0)
+ for (lang <- langs.tail) {
+ sb.append(" | ")
+ lang.bind(sb, 0)
+ }
+ }
+
+ override def precedence: Int =
langs match {
- case Nil => "{false}?" -> 100
- case lang :: Nil => lang.render
+ case Nil => 100
+ case lang +: Nil => lang.precedence
case langs if langs.contains(Seqn()) =>
langs.filterNot(_ == Seqn()) match {
- case Nil => "()" -> 100
- case lang :: Nil => s"${lang.bind(80)}?" -> 80
- case langs => langs.map(_.bind(0)).mkString("(", " | ", ")?") -> 0
+ case Nil => 100
+ case _ +: Nil => 80
+ case _ => 0
+ }
+ case _ => 0
+ }
+}
+
+object Star {
+ def apply(lang: RegLang): RegLang =
+ lang match {
+ case Seqn() => new Seqn()
+ case Alts(alts @ _*) =>
+ alts.filter(_ != new Seqn()) match {
+ case Nil => new Seqn()
+ case more => new Star(new Alts(more: _*))
}
- case langs => langs.mkString(" | ") -> 0
+ case other => new Star(other)
}
}
case class Star(lang: RegLang) extends RegLang {
- override def render: (String, Int) = s"${lang.bind(80)}*" -> 80
+ override lazy val hashCode: Int = ScalaRunTime._hashCode(this)
+
+ override def render(sb: Appendable): Unit = {
+ lang.bind(sb, 80)
+ sb.append("*")
+ }
+
+ override def precedence: Int = 80
}