From eab567b8715a3219e2494b17780b1d471d94787b Mon Sep 17 00:00:00 2001 From: Emmanuel Bastien Date: Fri, 29 Sep 2023 17:35:51 +0200 Subject: [PATCH] fix: recurse only on referential nodes --- examples/openapi.yaml | 6 +- oal-compiler/src/compile.rs | 8 ++- oal-compiler/src/compile_tests.rs | 30 +++++++++ oal-compiler/src/defgraph.rs | 63 ------------------- oal-compiler/src/eval_tests.rs | 41 ++++++++++-- oal-compiler/src/lib.rs | 1 - oal-compiler/src/resolve.rs | 71 +++++++++++++++++---- oal-compiler/src/resolve_tests.rs | 35 +++++++---- oal-compiler/src/typecheck.rs | 97 +++++++++++++++++++---------- oal-compiler/src/typecheck_tests.rs | 7 ++- 10 files changed, 223 insertions(+), 136 deletions(-) delete mode 100644 oal-compiler/src/defgraph.rs diff --git a/examples/openapi.yaml b/examples/openapi.yaml index ce894f9..2f90d49 100644 --- a/examples/openapi.yaml +++ b/examples/openapi.yaml @@ -207,7 +207,7 @@ components: - example: /some/path/_id_number_/template type: string format: uri-reference - - $ref: '#/components/schemas/hash-d60d1f8c1c82157cf19abfacf4d4aeae665becd17938a08d6436fbfc3ca277f2' + - $ref: '#/components/schemas/hash-949ca8b37ea72edb5d2a99579d7f8e77bab63f43a8e2dc6a7788ae9404b493bd' obj2: allOf: - $ref: '#/components/schemas/obj1' @@ -217,7 +217,7 @@ components: type: integer minimum: 0 maximum: 999 - hash-d60d1f8c1c82157cf19abfacf4d4aeae665becd17938a08d6436fbfc3ca277f2: + hash-949ca8b37ea72edb5d2a99579d7f8e77bab63f43a8e2dc6a7788ae9404b493bd: type: object properties: name: @@ -227,4 +227,4 @@ components: children: type: array items: - $ref: '#/components/schemas/hash-d60d1f8c1c82157cf19abfacf4d4aeae665becd17938a08d6436fbfc3ca277f2' + $ref: '#/components/schemas/hash-949ca8b37ea72edb5d2a99579d7f8e77bab63f43a8e2dc6a7788ae9404b493bd' diff --git a/oal-compiler/src/compile.rs b/oal-compiler/src/compile.rs index 47f2ec1..a34f232 100644 --- a/oal-compiler/src/compile.rs +++ b/oal-compiler/src/compile.rs @@ -2,13 +2,13 @@ use crate::errors::Result; use crate::inference::{constrain, substitute, tag}; use crate::module::ModuleSet; use crate::resolve::resolve; -use crate::typecheck::type_check; +use crate::typecheck::{cycles_check, type_check}; use oal_model::locator::Locator; /// Runs all compilation phases. pub fn compile(mods: &ModuleSet, loc: &Locator) -> Result<()> { - // Resolve variable and function references. - resolve(mods, loc)?; + // Resolve variable and function references. Returns the graph of definitions. + let graph = resolve(mods, loc)?; // Tag expressions with concrete and variable types. let _nvars = tag(mods, loc)?; // Collect the set of type inference equations. @@ -17,6 +17,8 @@ pub fn compile(mods: &ModuleSet, loc: &Locator) -> Result<()> { let set = eqs.unify()?; // Substitute tags in each class of equivalence with the representative tag. substitute(mods, loc, &set)?; + // Validates points of recursion in the graph of definitions. + cycles_check(graph, mods)?; // Check type tags against expectations. type_check(mods, loc)?; Ok(()) diff --git a/oal-compiler/src/compile_tests.rs b/oal-compiler/src/compile_tests.rs index 66704d5..3ff6ab7 100644 --- a/oal-compiler/src/compile_tests.rs +++ b/oal-compiler/src/compile_tests.rs @@ -1,6 +1,9 @@ use crate::compile::compile; use crate::module::ModuleSet; +use crate::tests::mods_from; +use oal_model::grammar::AbstractSyntaxNode; use oal_model::locator::Locator; +use oal_syntax::parser::Program; #[test] fn compile_modules() -> anyhow::Result<()> { @@ -25,3 +28,30 @@ fn compile_modules() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn compile_cycles() -> anyhow::Result<()> { + let mods = mods_from( + r#" + let a = { 'b b }; // mutually recursive + let b = { 'a a }; // ... + let c = { 'a a, 'b b }; // non-recursive + let d = { 'd d }; // self-recursive +"#, + )?; + + compile(&mods, mods.base()).expect("should compile"); + + let prog = Program::cast(mods.main().root()).expect("expected a program"); + let a = prog.declarations().nth(0).expect("expected a declaration"); + let b = prog.declarations().nth(1).expect("expected a declaration"); + let c = prog.declarations().nth(2).expect("expected a declaration"); + let d = prog.declarations().nth(3).expect("expected a declaration"); + + assert!(a.node().syntax().core_ref().is_recursive); + assert!(b.node().syntax().core_ref().is_recursive); + assert!(!c.node().syntax().core_ref().is_recursive); + assert!(d.node().syntax().core_ref().is_recursive); + + Ok(()) +} diff --git a/oal-compiler/src/defgraph.rs b/oal-compiler/src/defgraph.rs deleted file mode 100644 index 9790d67..0000000 --- a/oal-compiler/src/defgraph.rs +++ /dev/null @@ -1,63 +0,0 @@ -use crate::definition::External; -use crate::module::ModuleSet; -use petgraph::graph::NodeIndex; -use petgraph::Graph; -use std::collections::{hash_map, HashMap}; - -/// A graph of dependencies between variable definitions. -#[derive(Debug, Default)] -pub struct DefGraph { - /// The current (i.e. opened) definition. - current: Option, - /// The map from definitions to graph node indices. - externals: HashMap, - /// The graph of definitions. - graph: Graph, -} - -impl DefGraph { - /// Inserts a new definition. - fn insert(&mut self, ext: External) -> NodeIndex { - match self.externals.entry(ext.clone()) { - hash_map::Entry::Occupied(e) => *e.get(), - hash_map::Entry::Vacant(e) => *e.insert(self.graph.add_node(ext)), - } - } - - /// Opens a definition, becoming the current definition. - pub fn open(&mut self, from: External) { - self.current = Some(from); - } - - /// Closes a definition. - pub fn close(&mut self) { - self.current = None; - } - - /// Connects the current definition to another definition. - pub fn connect(&mut self, to: External) { - if let Some(from) = &self.current { - let from_idx = self.insert(from.clone()); - let to_idx = self.insert(to); - self.graph.add_edge(from_idx, to_idx, ()); - } - } - - /// Identifies points of recursion in the graph of definitions. - pub fn identify_recursion(&self, mods: &ModuleSet) { - let sccs = petgraph::algo::kosaraju_scc(&self.graph); - for component in sccs { - // A trivial component contains a single vertex which is not connected to itself. - // All non-trivial components contain self or mutually recursive definitions. - let is_trivial = component.len() == 1 && { - let idx = *component.first().unwrap(); - self.graph.find_edge(idx, idx).is_none() - }; - for index in component { - let ext = self.graph.node_weight(index).expect("should exist"); - let node = ext.node(mods); - node.syntax().core_mut().is_recursive = !is_trivial; - } - } - } -} diff --git a/oal-compiler/src/eval_tests.rs b/oal-compiler/src/eval_tests.rs index f50b601..95d1bb4 100644 --- a/oal-compiler/src/eval_tests.rs +++ b/oal-compiler/src/eval_tests.rs @@ -3,13 +3,13 @@ use crate::inference::{check_complete, constrain, substitute, tag}; use crate::resolve::resolve; use crate::spec::{Object, Reference, SchemaExpr, Spec, UriSegment}; use crate::tests::mods_from; -use crate::typecheck::type_check; +use crate::typecheck::{cycles_check, type_check}; use oal_syntax::atom::{HttpStatus, Method, VariadicOperator}; fn eval(code: &str, check: bool) -> anyhow::Result { let mods = mods_from(code)?; let loc = mods.base(); - resolve(&mods, loc)?; + let graph = resolve(&mods, loc)?; let _nvars = tag(&mods, loc)?; let eqs = constrain(&mods, loc)?; let set = eqs.unify()?; @@ -17,6 +17,7 @@ fn eval(code: &str, check: bool) -> anyhow::Result { if check { check_complete(&mods, loc)?; } + cycles_check(graph, &mods)?; type_check(&mods, loc)?; // Uncomment for debugging purpose: @@ -772,14 +773,44 @@ fn eval_unique_recursive_identifiers() -> anyhow::Result<()> { #[test] #[ignore = "not implemented"] fn eval_recursive_uri() -> anyhow::Result<()> { + let code = r#" + let a = concat /a a; + res a; + "#; + + assert!(matches!( + eval_check(code) + .expect_err(format!("expected error evaluating: {}", code).as_str()) + .downcast_ref::() + .expect("expected compiler error") + .kind, + errors::Kind::InvalidType + )); + let s = eval_check( r#" - let a = concat /a a; // Should not work - let x = /b?{ 'p b }; - let b = concat /a x; // Should work + let a = /b?{ 'p b }; + let b = concat /a a; + res b; "#, )?; assert_eq!(s.rels.len(), 1); + assert_eq!(s.refs.len(), 1); + + Ok(()) +} + +#[test] +fn eval_recursive_content() -> anyhow::Result<()> { + let s = eval_check( + r#" + let c = ; + let r = / on get -> c; + res r; + "#, + )?; + assert_eq!(s.rels.len(), 1); + assert_eq!(s.refs.len(), 1); Ok(()) } diff --git a/oal-compiler/src/lib.rs b/oal-compiler/src/lib.rs index f9ce72f..28a88ca 100644 --- a/oal-compiler/src/lib.rs +++ b/oal-compiler/src/lib.rs @@ -1,6 +1,5 @@ mod annotation; pub mod compile; -mod defgraph; pub mod definition; mod env; pub mod errors; diff --git a/oal-compiler/src/resolve.rs b/oal-compiler/src/resolve.rs index 4e99204..ec262c7 100644 --- a/oal-compiler/src/resolve.rs +++ b/oal-compiler/src/resolve.rs @@ -1,4 +1,3 @@ -use crate::defgraph::DefGraph; use crate::definition::{Definition, External}; use crate::env::{Entry, Env}; use crate::errors::{Error, Kind, Result}; @@ -8,8 +7,58 @@ use crate::tree::Core; use oal_model::grammar::{AbstractSyntaxNode, NodeCursor}; use oal_model::locator::Locator; use oal_syntax::parser::{Declaration, Import, Program, Recursion, Variable}; +use petgraph::graph::NodeIndex; +use petgraph::stable_graph::StableDiGraph; +use std::collections::{hash_map, HashMap}; -fn define_variable(env: &mut Env, defg: &mut DefGraph, var: Variable<'_, Core>) -> Result<()> { +pub type Graph = StableDiGraph; + +/// A builder for the graph of dependencies between variable definitions. +#[derive(Debug, Default)] +pub struct Builder { + /// The current (i.e. opened) definition. + current: Option, + /// The map from definitions to graph node indices. + externals: HashMap, + /// The graph of definitions. + graph: Graph, +} + +impl Builder { + /// Inserts a new definition. + fn insert(&mut self, ext: External) -> NodeIndex { + match self.externals.entry(ext.clone()) { + hash_map::Entry::Occupied(e) => *e.get(), + hash_map::Entry::Vacant(e) => *e.insert(self.graph.add_node(ext)), + } + } + + /// Opens a definition, becoming the current definition. + pub fn open(&mut self, from: External) { + self.current = Some(from); + } + + /// Closes a definition. + pub fn close(&mut self) { + self.current = None; + } + + /// Connects the current definition to another definition. + pub fn connect(&mut self, to: External) { + if let Some(from) = &self.current { + let from_idx = self.insert(from.clone()); + let to_idx = self.insert(to); + self.graph.add_edge(from_idx, to_idx, ()); + } + } + + /// Returns the graph of definitions. + pub fn graph(self) -> Graph { + self.graph + } +} + +fn define_variable(env: &mut Env, defg: &mut Builder, var: Variable<'_, Core>) -> Result<()> { let qualifier = var.qualifier().map(|q| q.ident()); let entry = Entry::new(var.ident(), qualifier); if let Some(definition) = env.lookup(&entry) { @@ -57,7 +106,7 @@ fn declare_variable(env: &mut Env, decl: Declaration<'_, Core>) -> Result<()> { } } -fn open_declaration(env: &mut Env, defg: &mut DefGraph, decl: Declaration<'_, Core>) -> Result<()> { +fn open_declaration(env: &mut Env, defg: &mut Builder, decl: Declaration<'_, Core>) -> Result<()> { env.open(); defg.open(External::new(decl.node())); for binding in decl.bindings() { @@ -68,7 +117,7 @@ fn open_declaration(env: &mut Env, defg: &mut DefGraph, decl: Declaration<'_, Co Ok(()) } -fn close_declaration(env: &mut Env, defg: &mut DefGraph) -> Result<()> { +fn close_declaration(env: &mut Env, defg: &mut Builder) -> Result<()> { defg.close(); env.close(); Ok(()) @@ -88,8 +137,8 @@ fn close_recursion(env: &mut Env) -> Result<()> { Ok(()) } -pub fn resolve(mods: &ModuleSet, loc: &Locator) -> Result<()> { - let defg = &mut DefGraph::default(); +pub fn resolve(mods: &ModuleSet, loc: &Locator) -> Result { + let mut defg = Builder::default(); let env = &mut Env::new(); stdlib::import(env)?; @@ -107,16 +156,16 @@ pub fn resolve(mods: &ModuleSet, loc: &Locator) -> Result<()> { match cursor { NodeCursor::Start(node) => { if let Some(decl) = Declaration::cast(node) { - open_declaration(env, defg, decl)?; + open_declaration(env, &mut defg, decl)?; } else if let Some(var) = Variable::cast(node) { - define_variable(env, defg, var)?; + define_variable(env, &mut defg, var)?; } else if let Some(rec) = Recursion::cast(node) { open_recursion(env, rec)?; } } NodeCursor::End(node) => { if Declaration::cast(node).is_some() { - close_declaration(env, defg)?; + close_declaration(env, &mut defg)?; } else if Recursion::cast(node).is_some() { close_recursion(env)?; } @@ -124,7 +173,5 @@ pub fn resolve(mods: &ModuleSet, loc: &Locator) -> Result<()> { } } - defg.identify_recursion(mods); - - Ok(()) + Ok(defg.graph()) } diff --git a/oal-compiler/src/resolve_tests.rs b/oal-compiler/src/resolve_tests.rs index ce5b896..35fb270 100644 --- a/oal-compiler/src/resolve_tests.rs +++ b/oal-compiler/src/resolve_tests.rs @@ -9,6 +9,7 @@ use oal_syntax::parser as syn; use oal_syntax::parser::{ Application, Binding, Declaration, Primitive, Program, Terminal, Variable, }; +use petgraph::dot::{Config, Dot}; fn definition<'a>(mods: &'a ModuleSet, node: NRef<'a>) -> NRef<'a> { let core = node.syntax().core_ref(); @@ -116,7 +117,7 @@ fn resolve_not_in_scope() -> anyhow::Result<()> { } #[test] -fn resolve_recursion() -> anyhow::Result<()> { +fn resolve_graph() -> anyhow::Result<()> { let mods = mods_from( r#" let a = { 'b b }; // mutually recursive @@ -126,18 +127,26 @@ fn resolve_recursion() -> anyhow::Result<()> { "#, )?; - resolve(&mods, mods.base()).expect("expected resolution"); - - let prog = Program::cast(mods.main().root()).expect("expected a program"); - let a = prog.declarations().nth(0).expect("expected a declaration"); - let b = prog.declarations().nth(1).expect("expected a declaration"); - let c = prog.declarations().nth(2).expect("expected a declaration"); - let d = prog.declarations().nth(3).expect("expected a declaration"); - - assert!(a.node().syntax().core_ref().is_recursive); - assert!(b.node().syntax().core_ref().is_recursive); - assert!(!c.node().syntax().core_ref().is_recursive); - assert!(d.node().syntax().core_ref().is_recursive); + let graph = resolve(&mods, mods.base()).expect("should return a graph"); + let graphviz = format!( + "{:?}", + Dot::with_config(&graph, &[Config::EdgeNoLabel, Config::NodeNoLabel]) + ); + assert_eq!( + graphviz, + r#"digraph { + 0 [ ] + 1 [ ] + 2 [ ] + 3 [ ] + 0 -> 1 [ ] + 1 -> 0 [ ] + 2 -> 0 [ ] + 2 -> 1 [ ] + 3 -> 3 [ ] +} +"# + ); Ok(()) } diff --git a/oal-compiler/src/typecheck.rs b/oal-compiler/src/typecheck.rs index 4e5c80a..dae16aa 100644 --- a/oal-compiler/src/typecheck.rs +++ b/oal-compiler/src/typecheck.rs @@ -1,19 +1,18 @@ use crate::errors::{Error, Kind, Result}; use crate::inference::tag::Tag; use crate::module::ModuleSet; +use crate::resolve::Graph; use crate::tree::{Core, NRef}; use oal_model::grammar::AbstractSyntaxNode; use oal_model::locator::Locator; use oal_syntax::atom; use oal_syntax::parser as syn; +use petgraph::visit::EdgeRef; +use petgraph::Direction::Incoming; -struct TagWrap(Tag, bool); +struct TagWrap(Tag); impl TagWrap { - fn is_recursive(&self) -> bool { - self.1 - } - fn is_variable(&self) -> bool { matches!(self.0, Tag::Var(_)) } @@ -31,13 +30,6 @@ impl TagWrap { ) } - fn is_referential(&self) -> bool { - matches!(self.0, |Tag::Relation| Tag::Object - | Tag::Array - | Tag::Any - | Tag::Var(_)) - } - fn is_content_like(&self) -> bool { self.is_schema() || self.0 == Tag::Content } @@ -76,7 +68,7 @@ impl TagWrap { } fn get_tag(n: NRef) -> TagWrap { - TagWrap(crate::tree::get_tag(n), n.syntax().core_ref().is_recursive) + TagWrap(crate::tree::get_tag(n)) } fn check_variadic_operation(op: syn::VariadicOp) -> Result<()> { @@ -199,21 +191,6 @@ fn check_declaration(decl: syn::Declaration) -> Result<()> { Error::new(Kind::InvalidType, "ill-formed reference, not a schema").with(&decl), ); } - if get_tag(decl.node()).is_recursive() { - if !rhs.is_schema() { - return Err( - Error::new(Kind::InvalidType, "ill-formed recursion, not a schema").with(&decl), - ); - } - if !rhs.is_referential() { - return Err( - Error::new(Kind::InvalidType, "ill-formed recursion, not referential").with(&decl), - ); - } - if decl.has_bindings() { - return Err(Error::new(Kind::InvalidType, "ill-formed lambda, recursive").with(&decl)); - } - } Ok(()) } @@ -226,14 +203,10 @@ fn check_resource(res: syn::Resource) -> Result<()> { fn check_recursion(rec: syn::Recursion) -> Result<()> { let tag = get_tag(rec.node()); - if !tag.is_schema() { + // TODO: support for recursive URI definitions (i.e. self-reference via query string) + if !tag.is_schema() || tag.is_uri() { return Err(Error::new(Kind::InvalidType, "ill-formed recursion, not a schema").with(&rec)); } - if !tag.is_referential() { - return Err( - Error::new(Kind::InvalidType, "ill-formed recursion, not referential").with(&rec), - ); - } Ok(()) } @@ -274,3 +247,59 @@ pub fn type_check(mods: &ModuleSet, loc: &Locator) -> Result<()> { Ok(()) } + +/// Validates points of recursion in the graph of definitions. +pub fn cycles_check(mut graph: Graph, mods: &ModuleSet) -> Result<()> { + let mut has_changed = true; // whether the graph has changed and another iteration is required. + let mut inbounds = Vec::new(); // to avoid an allocation on each iteration. + + while has_changed { + has_changed = false; + // Iterate over the strongly connected components of the definition graph. + for component in petgraph::algo::kosaraju_scc(&graph) { + // A trivial component contains a single vertex which is not connected to itself. + // All non-trivial components contain self or mutually recursive definitions. + let is_trivial = component.len() == 1 && { + let idx = *component.first().unwrap(); + graph.find_edge(idx, idx).is_none() + }; + // Trivial components are not relevant to cycle detection. + if is_trivial { + continue; + } + // Check each non-trivial strongly connected component for referential nodes. + for index in component.iter() { + let ext = graph.node_weight(*index).expect("node should exist"); + let node = ext.node(mods); + let tag = get_tag(node); + // Flag incoming edges to referential definitions for removal, + // as those definitions evaluate to references. + // TODO: support for recursive URI definitions (i.e. self-reference via query string) + if tag.is_schema() && !tag.is_uri() { + node.syntax().core_mut().is_recursive = true; + for e in graph.edges_directed(*index, Incoming) { + inbounds.push(e.id()) + } + } + } + if inbounds.is_empty() { + // The program is invalid if there are non-trivial strongly connected components + // that cannot be eliminated with references, i.e. a component without a referential node. + let index = component.first().expect("component should not be empty"); + let ext = graph.node_weight(*index).expect("node should exist"); + let node = ext.node(mods); + return Err(Error::new(Kind::InvalidType, "ill-formed recursion").at(node.span())); + } + } + if !inbounds.is_empty() { + // Try again after removing edges. + has_changed = true; + } + // Remove the incoming edges to referential definitions, + // to get rid of non-trivial strongly connected components. + for e in inbounds.drain(..) { + graph.remove_edge(e); + } + } + Ok(()) +} diff --git a/oal-compiler/src/typecheck_tests.rs b/oal-compiler/src/typecheck_tests.rs index f7bfd9f..f8da71b 100644 --- a/oal-compiler/src/typecheck_tests.rs +++ b/oal-compiler/src/typecheck_tests.rs @@ -3,16 +3,17 @@ use crate::inference::{check_complete, constrain, substitute, tag}; use crate::module::ModuleSet; use crate::resolve::resolve; use crate::tests::mods_from; -use crate::typecheck::type_check; +use crate::typecheck::{cycles_check, type_check}; fn compile(code: &str) -> anyhow::Result { let mods = mods_from(code)?; - resolve(&mods, mods.base())?; + let graph = resolve(&mods, mods.base())?; let _nvars = tag(&mods, mods.base())?; let eqs = constrain(&mods, mods.base())?; let set = eqs.unify()?; substitute(&mods, mods.base(), &set)?; check_complete(&mods, mods.base())?; + cycles_check(graph, &mods)?; type_check(&mods, mods.base())?; Ok(mods) } @@ -57,6 +58,8 @@ fn typecheck_error() { "res num;", "let a = str !;", "res / on (rec x (get -> { 'self uri }));", + "let f a = {} & (f { 'p a });", + "let a = rec x (concat /a x);", ]; for c in cases {