From c5f3c1ae2ea2f9df1cc280da5681f0962ecf22fa Mon Sep 17 00:00:00 2001 From: Pieter Bos Date: Wed, 29 May 2024 16:00:27 +0200 Subject: [PATCH] more grammar debugging tools --- src/parsers/vct/parsers/debug/ATNTools.scala | 64 +++++- src/parsers/vct/parsers/debug/Grammar.scala | 224 ++++++++++++------- 2 files changed, 198 insertions(+), 90 deletions(-) diff --git a/src/parsers/vct/parsers/debug/ATNTools.scala b/src/parsers/vct/parsers/debug/ATNTools.scala index ad3570fb5b..466221960f 100644 --- a/src/parsers/vct/parsers/debug/ATNTools.scala +++ b/src/parsers/vct/parsers/debug/ATNTools.scala @@ -88,7 +88,15 @@ object ATNTools { case other => other.target } - edges += ((s, transitionLanguage(recognizer, trans), target)) + val language = + trans match { + case rule: RuleTransition + if expandRules.contains(rule.ruleIndex) => + Seqn() + case other => transitionLanguage(recognizer, other) + } + + edges += ((s, language, target)) if (!explored.contains(target)) { toExplore += target } } @@ -118,8 +126,8 @@ object ATNTools { val startOut = outEdge.getOrElseUpdate(start, mutable.Map()) val endIn = inEdge.getOrElseUpdate(end, mutable.Map()) - startOut(end) = Alts(lang, startOut.getOrElse(end, Alts())).simplify - endIn(start) = Alts(lang, endIn.getOrElse(start, Alts())).simplify + startOut(end) = Alts(lang, startOut.getOrElse(end, Alts())) + endIn(start) = Alts(lang, endIn.getOrElse(start, Alts())) } def delete(state: ATNState): Unit = { @@ -157,13 +165,31 @@ object ATNTools { def compact(): Unit = { output(Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-0.dot")) - for ((state, i) <- inEdge.keys.toSeq.zipWithIndex) { + var i = 1 + + while ( + inEdge.size > 2 || outEdge.size > 2 || (inEdge.keys ++ outEdge.keys) + .toSeq.distinct.size > 2 + ) { + val state = (inEdge.keys ++ outEdge.keys).minBy(state => + ( + state == s0 || state == accept, + (inEdge.getOrElse(state, EMPTY_MAP).size - 1) * + (outEdge.getOrElse(state, EMPTY_MAP).size - 1), + ) + ) + if (state != s0 && state != accept) { delete(state) } + else + ??? - output( - Paths - .get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-${i + 1}.dot") - ) + println(inEdge.size) + + /*output( + Paths.get(s"tmp/${recognizer.getRuleNames()(s0.ruleIndex)}-$i.dot") + )*/ + + i += 1 } } @@ -180,7 +206,7 @@ object ATNTools { val endAtReject = Star( Alts(stayReject, Seqn(goAccept, Star(stayAccept), goReject)) ) - Seqn(endAtReject, goAccept, Star(stayAccept)).simplify + Seqn(endAtReject, goAccept, Star(stayAccept)) } } @@ -196,16 +222,34 @@ object ATNTools { * class of the parser to analyze (e.g. vct.antlr4.generated.CParser) * Argument 2: parse rule to derive the ATN of (e.g. initializerList) * Argument 3: output file in DOT/graphviz format (e.g. initializerList.dot) + * Arguments 4: (optional) rules to expand separated by comma, or * to expand + * all rules */ def main(args: Array[String]): Unit = { val parserClass = getClass.getClassLoader.loadClass(args(0)) val parser = parserClass.getConstructor(classOf[TokenStream]) .newInstance(null).asInstanceOf[Recognizer] val ruleIndex = parser.getRuleIndexMap.get(args(1)) + val expand = + args.lift(3) match { + case None => Set.empty[Int] + case Some("*") => parser.getRuleNames.indices.toSet + case Some(list) => + list.split(",").map(parser.getRuleIndexMap.get(_).toInt).toSet + } val state = parser.getATN.ruleToStartState(ruleIndex) - val edges = getEdges(parser, state) + val edges = getEdges(parser, state, expandRules = expand) Using(Files.newBufferedWriter(Paths.get(args(2)))) { w => outputGraph(parser, edges, w) } + val language = + new LanguageGraph( + parser, + state, + parser.getATN.ruleToStopState(ruleIndex), + edges, + ) + val lang = language.asRegLang() + lang.render(System.out) } } diff --git a/src/parsers/vct/parsers/debug/Grammar.scala b/src/parsers/vct/parsers/debug/Grammar.scala index eb56ac5f7a..6bcf787979 100644 --- a/src/parsers/vct/parsers/debug/Grammar.scala +++ b/src/parsers/vct/parsers/debug/Grammar.scala @@ -7,77 +7,53 @@ import scala.annotation.tailrec import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters.CollectionHasAsScala +import scala.runtime.ScalaRunTime trait RenderExp { - def render: (String, Int) - - def bind(minPrecedence: Int): String = { - val (text, precedence) = render - - if (precedence < minPrecedence) - s"($text)" - else - text + def render(sb: Appendable): Unit + def precedence: Int + + def bind(sb: Appendable, minPrecedence: Int): Unit = { + if (precedence < minPrecedence) { + sb.append("(") + render(sb) + sb.append(")") + } else + render(sb) } - override def toString: String = render._1 + override def toString: String = { + val sb = new StringBuilder() + render(new Appendable { + override def append(charSequence: CharSequence): Appendable = { + sb.append(charSequence); this + } + override def append( + charSequence: CharSequence, + i: Int, + i1: Int, + ): Appendable = { sb.append(charSequence.subSequence(i, i1)); this } + override def append(c: Char): Appendable = { sb.append(c); this } + }) + sb.toString() + } } /** Data structure to represent regular expressions with added node to represent * arbitrary (potentially non-regular) antlr expressions. */ sealed trait RegLang extends RenderExp { - private def collectSeqn(buf: ArrayBuffer[RegLang]): Unit = + private[debug] def collectSeqn(buf: ArrayBuffer[RegLang]): Unit = this match { - case Seqn(langs @ _*) => langs.foreach(_.simplify.collectSeqn(buf)) + case Seqn(langs @ _*) => langs.foreach(_.collectSeqn(buf)) case other => buf += other } - private def collectAlts(buf: mutable.Set[RegLang]): Unit = + private[debug] def collectAlts(buf: mutable.Set[RegLang]): Unit = this match { - case Alts(langs @ _*) => langs.foreach(_.simplify.collectAlts(buf)) + case Alts(langs @ _*) => langs.foreach(_.collectAlts(buf)) case other => buf += other } - - def flatSeqn: Seq[RegLang] = { - val buf = ArrayBuffer[RegLang]() - collectSeqn(buf) - buf.toSeq - } - - def flatAlts: Seq[RegLang] = { - val buf = mutable.Set[RegLang]() - collectAlts(buf) - buf.toSeq - } - - def simplify: RegLang = - this match { - case tok: Antlr => tok - case Seqn(_ @_*) => - flatSeqn match { - case Nil => Seqn() - case one :: Nil => one - case more if more.contains(Alts()) => Alts() - case more => Seqn(more: _*) - } - case Alts(_ @_*) => - flatAlts match { - case Nil => Alts() - case one :: Nil => one - case more => Alts(more: _*) - } - case Star(lang) => - lang.simplify match { - case Seqn() => Seqn() - case Alts(alts @ _*) => - alts.filter(_ != Seqn()) match { - case Nil => Seqn() - case more => Star(Alts(more: _*)) - } - case other => Star(other) - } - } } object Antlr { @@ -117,51 +93,139 @@ object Antlr { } case class Antlr(text: String, precedence: Int) extends RegLang { - override def render: (String, Int) = text -> precedence + override def render(sb: Appendable): Unit = sb.append(text) +} + +object Seqn { + def apply(langs: RegLang*): RegLang = { + val buf = ArrayBuffer[RegLang]() + langs.foreach(_.collectSeqn(buf)) + buf.toSeq match { + case Nil => new Seqn() + case one +: Nil => one + case more if more.contains(new Alts()) => new Alts() + case more => new Seqn(more: _*) + } + } } case class Seqn(langs: RegLang*) extends RegLang { + override lazy val hashCode: Int = ScalaRunTime._hashCode(this) + @tailrec - private def renderSeqnWithPlus(s: StringBuilder, langs: Seq[RegLang]): Unit = + private def renderSeqnWithPlus( + sb: Appendable, + langs: Seq[RegLang], + safe: Boolean, + ): Unit = langs match { case Nil => - case Star(x) :: y :: tail if x == y => - s.append(x.bind(50)).append("+ ") - renderSeqnWithPlus(s, tail) - case Star(x @ Seqn(xs @ _*)) :: tail if tail.startsWith(xs) => - s.append(x.bind(50)).append("+ ") - renderSeqnWithPlus(s, tail.drop(xs.size)) - case x :: tail => - s.append(x.bind(50)).append(" ") - renderSeqnWithPlus(s, tail) + case Star(x) +: y +: tail if x == y => + if (!safe) + sb.append(' ') + x.bind(sb, 50) + sb.append("+") + renderSeqnWithPlus(sb, tail, false) + case Star(x @ Seqn(xs @ _*)) +: tail if tail.startsWith(xs) => + if (!safe) + sb.append(' ') + x.bind(sb, 50) + sb.append("+") + renderSeqnWithPlus(sb, tail.drop(xs.size), false) + case x +: tail => + if (!safe) + sb.append(' ') + x.bind(sb, 50) + renderSeqnWithPlus(sb, tail, false) } - override def render: (String, Int) = + override def render(sb: Appendable): Unit = langs match { - case Nil => "()" -> 100 - case langs => - val sb = new StringBuilder() - renderSeqnWithPlus(sb, langs) - sb.setLength(sb.length() - 1) - sb.toString -> 50 + case Nil => sb.append("()") + case langs => renderSeqnWithPlus(sb, langs, true) + } + + override def precedence: Int = + langs match { + case Nil => 100 + case _ => 50 + } +} + +object Alts { + def apply(langs: RegLang*): RegLang = { + val buf = mutable.Set[RegLang]() + langs.foreach(_.collectAlts(buf)) + buf.toSeq match { + case Nil => new Alts() + case one +: Nil => one + case more => new Alts(more: _*) } + } } case class Alts(langs: RegLang*) extends RegLang { - override def render: (String, Int) = + override lazy val hashCode: Int = ScalaRunTime._hashCode(this) + + override def render(sb: Appendable): Unit = + langs match { + case Nil => sb.append("{false}?") + case lang +: Nil => lang.render(sb) + case langs if langs.contains(Seqn()) => + langs.filterNot(_ == Seqn()) match { + case Nil => sb.append("()") + case lang +: Nil => lang.bind(sb, 80) + case langs => + sb.append("(") + langs.head.bind(sb, 0) + for (lang <- langs.tail) { + sb.append(" | ") + lang.bind(sb, 0) + } + sb.append(")?") + } + case langs => + langs.head.bind(sb, 0) + for (lang <- langs.tail) { + sb.append(" | ") + lang.bind(sb, 0) + } + } + + override def precedence: Int = langs match { - case Nil => "{false}?" -> 100 - case lang :: Nil => lang.render + case Nil => 100 + case lang +: Nil => lang.precedence case langs if langs.contains(Seqn()) => langs.filterNot(_ == Seqn()) match { - case Nil => "()" -> 100 - case lang :: Nil => s"${lang.bind(80)}?" -> 80 - case langs => langs.map(_.bind(0)).mkString("(", " | ", ")?") -> 0 + case Nil => 100 + case _ +: Nil => 80 + case _ => 0 + } + case _ => 0 + } +} + +object Star { + def apply(lang: RegLang): RegLang = + lang match { + case Seqn() => new Seqn() + case Alts(alts @ _*) => + alts.filter(_ != new Seqn()) match { + case Nil => new Seqn() + case more => new Star(new Alts(more: _*)) } - case langs => langs.mkString(" | ") -> 0 + case other => new Star(other) } } case class Star(lang: RegLang) extends RegLang { - override def render: (String, Int) = s"${lang.bind(80)}*" -> 80 + override lazy val hashCode: Int = ScalaRunTime._hashCode(this) + + override def render(sb: Appendable): Unit = { + lang.bind(sb, 80) + sb.append("*") + } + + override def precedence: Int = 80 }