From e1fb61024101bff91bef732b1e6c7408f7df3b43 Mon Sep 17 00:00:00 2001 From: suscd Date: Tue, 13 Jul 2021 06:55:25 -0700 Subject: [PATCH] lang/syn: parse entire crate for IDL Adding a utility to load all files for a crate by following module declarations if they reference another file. Adapted existing parsing functions to use the new utility to find all relevant items within the crate being processed. --- lang/syn/src/idl/file.rs | 284 +++++++++++++++------------------ lang/syn/src/parser/context.rs | 178 +++++++++++++++++++++ lang/syn/src/parser/mod.rs | 1 + 3 files changed, 308 insertions(+), 155 deletions(-) create mode 100644 lang/syn/src/parser/context.rs diff --git a/lang/syn/src/idl/file.rs b/lang/syn/src/idl/file.rs index 7eb4935163..d6a1db8f17 100644 --- a/lang/syn/src/idl/file.rs +++ b/lang/syn/src/idl/file.rs @@ -1,12 +1,11 @@ use crate::idl::*; +use crate::parser::context::CrateContext; use crate::parser::{self, accounts, error, program}; use crate::{AccountField, AccountsStruct, StateIx}; use anyhow::Result; use heck::MixedCase; use quote::ToTokens; use std::collections::{HashMap, HashSet}; -use std::fs::File; -use std::io::Read; use std::path::Path; const DERIVE_NAME: &str = "Accounts"; @@ -15,16 +14,11 @@ const ERROR_CODE_OFFSET: u32 = 300; // Parse an entire interface file. pub fn parse(filename: impl AsRef) -> Result { - let mut file = File::open(&filename)?; + let ctx = CrateContext::parse(filename)?; - let mut src = String::new(); - file.read_to_string(&mut src).expect("Unable to read file"); + let p = program::parse(parse_program_mod(&ctx))?; - let f = syn::parse_file(&src).expect("Unable to parse file"); - - let p = program::parse(parse_program_mod(&f))?; - - let accs = parse_account_derives(&f); + let accs = parse_account_derives(&ctx); let state = match p.state { None => None, @@ -129,7 +123,7 @@ pub fn parse(filename: impl AsRef) -> Result { } }, }; - let error = parse_error_enum(&f).map(|mut e| error::parse(&mut e, None)); + let error = parse_error_enum(&ctx).map(|mut e| error::parse(&mut e, None)); let error_codes = error.as_ref().map(|e| { e.codes .iter() @@ -169,7 +163,7 @@ pub fn parse(filename: impl AsRef) -> Result { }) .collect::>(); - let events = parse_events(&f) + let events = parse_events(&ctx) .iter() .map(|e: &&syn::ItemStruct| { let fields = match &e.fields { @@ -202,9 +196,9 @@ pub fn parse(filename: impl AsRef) -> Result { // All user defined types. let mut accounts = vec![]; let mut types = vec![]; - let ty_defs = parse_ty_defs(&f)?; + let ty_defs = parse_ty_defs(&ctx)?; - let account_structs = parse_accounts(&f); + let account_structs = parse_accounts(&ctx); let account_names: HashSet = account_structs .iter() .map(|a| a.ident.to_string()) @@ -242,10 +236,10 @@ pub fn parse(filename: impl AsRef) -> Result { } // Parse the main program mod. -fn parse_program_mod(f: &syn::File) -> syn::ItemMod { - let mods = f - .items - .iter() +fn parse_program_mod(ctx: &CrateContext) -> syn::ItemMod { + let root = ctx.root_module(); + let mods = root + .items() .filter_map(|i| match i { syn::Item::Mod(item_mod) => { let mod_count = item_mod @@ -267,173 +261,153 @@ fn parse_program_mod(f: &syn::File) -> syn::ItemMod { mods[0].clone() } -fn parse_error_enum(f: &syn::File) -> Option { - f.items - .iter() - .filter_map(|i| match i { - syn::Item::Enum(item_enum) => { - let attrs_count = item_enum - .attrs - .iter() - .filter(|attr| { - let segment = attr.path.segments.last().unwrap(); - segment.ident == "error" - }) - .count(); - match attrs_count { - 0 => None, - 1 => Some(item_enum), - _ => panic!("Invalid syntax: one error attribute allowed"), - } +fn parse_error_enum(ctx: &CrateContext) -> Option { + ctx.enums() + .filter_map(|item_enum| { + let attrs_count = item_enum + .attrs + .iter() + .filter(|attr| { + let segment = attr.path.segments.last().unwrap(); + segment.ident == "error" + }) + .count(); + match attrs_count { + 0 => None, + 1 => Some(item_enum), + _ => panic!("Invalid syntax: one error attribute allowed"), } - _ => None, }) .next() .cloned() } -fn parse_events(f: &syn::File) -> Vec<&syn::ItemStruct> { - f.items - .iter() - .filter_map(|i| match i { - syn::Item::Struct(item_strct) => { - let attrs_count = item_strct - .attrs - .iter() - .filter(|attr| { - let segment = attr.path.segments.last().unwrap(); - segment.ident == "event" - }) - .count(); - match attrs_count { - 0 => None, - 1 => Some(item_strct), - _ => panic!("Invalid syntax: one event attribute allowed"), - } +fn parse_events(ctx: &CrateContext) -> Vec<&syn::ItemStruct> { + ctx.structs() + .filter_map(|item_strct| { + let attrs_count = item_strct + .attrs + .iter() + .filter(|attr| { + let segment = attr.path.segments.last().unwrap(); + segment.ident == "event" + }) + .count(); + match attrs_count { + 0 => None, + 1 => Some(item_strct), + _ => panic!("Invalid syntax: one event attribute allowed"), } - _ => None, }) .collect() } -fn parse_accounts(f: &syn::File) -> Vec<&syn::ItemStruct> { - f.items - .iter() - .filter_map(|i| match i { - syn::Item::Struct(item_strct) => { - let attrs_count = item_strct - .attrs - .iter() - .filter(|attr| { - let segment = attr.path.segments.last().unwrap(); - segment.ident == "account" || segment.ident == "associated" - }) - .count(); - match attrs_count { - 0 => None, - 1 => Some(item_strct), - _ => panic!("Invalid syntax: one event attribute allowed"), - } +fn parse_accounts(ctx: &CrateContext) -> Vec<&syn::ItemStruct> { + ctx.structs() + .filter_map(|item_strct| { + let attrs_count = item_strct + .attrs + .iter() + .filter(|attr| { + let segment = attr.path.segments.last().unwrap(); + segment.ident == "account" || segment.ident == "associated" + }) + .count(); + match attrs_count { + 0 => None, + 1 => Some(item_strct), + _ => panic!("Invalid syntax: one event attribute allowed"), } - _ => None, }) .collect() } // Parse all structs implementing the `Accounts` trait. -fn parse_account_derives(f: &syn::File) -> HashMap { - f.items - .iter() - .filter_map(|i| match i { - syn::Item::Struct(i_strct) => { - for attr in &i_strct.attrs { - if attr.tokens.to_string().contains(DERIVE_NAME) { - let strct = accounts::parse(i_strct).expect("Code not parseable"); - return Some((strct.ident.to_string(), strct)); - } +fn parse_account_derives(ctx: &CrateContext) -> HashMap { + // TODO: parse manual implementations. Currently we only look + // for derives. + ctx.structs() + .filter_map(|i_strct| { + for attr in &i_strct.attrs { + if attr.tokens.to_string().contains(DERIVE_NAME) { + let strct = accounts::parse(i_strct).expect("Code not parseable"); + return Some((strct.ident.to_string(), strct)); } - None } - // TODO: parse manual implementations. Currently we only look - // for derives. - _ => None, + None }) .collect() } // Parse all user defined types in the file. -fn parse_ty_defs(f: &syn::File) -> Result> { - f.items - .iter() - .filter_map(|i| match i { - syn::Item::Struct(item_strct) => { - for attr in &item_strct.attrs { - if attr.tokens.to_string().contains(DERIVE_NAME) { - return None; - } +fn parse_ty_defs(ctx: &CrateContext) -> Result> { + ctx.structs() + .filter_map(|item_strct| { + for attr in &item_strct.attrs { + if attr.tokens.to_string().contains(DERIVE_NAME) { + return None; } - if let syn::Visibility::Public(_) = &item_strct.vis { - let name = item_strct.ident.to_string(); - let fields = match &item_strct.fields { - syn::Fields::Named(fields) => fields - .named - .iter() - .map(|f: &syn::Field| { - let mut tts = proc_macro2::TokenStream::new(); - f.ty.to_tokens(&mut tts); - Ok(IdlField { - name: f.ident.as_ref().unwrap().to_string().to_mixed_case(), - ty: tts.to_string().parse()?, - }) + } + if let syn::Visibility::Public(_) = &item_strct.vis { + let name = item_strct.ident.to_string(); + let fields = match &item_strct.fields { + syn::Fields::Named(fields) => fields + .named + .iter() + .map(|f: &syn::Field| { + let mut tts = proc_macro2::TokenStream::new(); + f.ty.to_tokens(&mut tts); + Ok(IdlField { + name: f.ident.as_ref().unwrap().to_string().to_mixed_case(), + ty: tts.to_string().parse()?, }) - .collect::>>(), - _ => panic!("Only named structs are allowed."), - }; + }) + .collect::>>(), + f => panic!("Only named structs are allowed. Got {} {:?}", &name, f), + }; - return Some(fields.map(|fields| IdlTypeDefinition { - name, - ty: IdlTypeDefinitionTy::Struct { fields }, - })); - } - None - } - syn::Item::Enum(enm) => { - let name = enm.ident.to_string(); - let variants = enm - .variants - .iter() - .map(|variant: &syn::Variant| { - let name = variant.ident.to_string(); - let fields = match &variant.fields { - syn::Fields::Unit => None, - syn::Fields::Unnamed(fields) => { - let fields: Vec = - fields.unnamed.iter().map(to_idl_type).collect(); - Some(EnumFields::Tuple(fields)) - } - syn::Fields::Named(fields) => { - let fields: Vec = fields - .named - .iter() - .map(|f: &syn::Field| { - let name = f.ident.as_ref().unwrap().to_string(); - let ty = to_idl_type(f); - IdlField { name, ty } - }) - .collect(); - Some(EnumFields::Named(fields)) - } - }; - IdlEnumVariant { name, fields } - }) - .collect::>(); - Some(Ok(IdlTypeDefinition { + return Some(fields.map(|fields| IdlTypeDefinition { name, - ty: IdlTypeDefinitionTy::Enum { variants }, - })) + ty: IdlTypeDefinitionTy::Struct { fields }, + })); } - _ => None, + None }) + .chain(ctx.enums().filter_map(|enm| { + let name = enm.ident.to_string(); + let variants = enm + .variants + .iter() + .map(|variant: &syn::Variant| { + let name = variant.ident.to_string(); + let fields = match &variant.fields { + syn::Fields::Unit => None, + syn::Fields::Unnamed(fields) => { + let fields: Vec = + fields.unnamed.iter().map(to_idl_type).collect(); + Some(EnumFields::Tuple(fields)) + } + syn::Fields::Named(fields) => { + let fields: Vec = fields + .named + .iter() + .map(|f: &syn::Field| { + let name = f.ident.as_ref().unwrap().to_string(); + let ty = to_idl_type(f); + IdlField { name, ty } + }) + .collect(); + Some(EnumFields::Named(fields)) + } + }; + IdlEnumVariant { name, fields } + }) + .collect::>(); + Some(Ok(IdlTypeDefinition { + name, + ty: IdlTypeDefinitionTy::Enum { variants }, + })) + })) .collect() } diff --git a/lang/syn/src/parser/context.rs b/lang/syn/src/parser/context.rs new file mode 100644 index 0000000000..6801e8b5b8 --- /dev/null +++ b/lang/syn/src/parser/context.rs @@ -0,0 +1,178 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use syn::parse::{Error as ParseError, Result as ParseResult}; + +/// Crate parse context +/// +/// Keeps track of modules defined within a crate. +pub struct CrateContext { + modules: HashMap, +} + +impl CrateContext { + pub fn structs(&self) -> impl Iterator { + self.modules.iter().flat_map(|(_, ctx)| ctx.structs()) + } + + pub fn enums(&self) -> impl Iterator { + self.modules.iter().flat_map(|(_, ctx)| ctx.enums()) + } + + pub fn modules(&self) -> impl Iterator { + self.modules + .iter() + .map(move |(_, detail)| ModuleContext { detail }) + } + + pub fn root_module(&self) -> ModuleContext { + ModuleContext { + detail: self.modules.get("crate").unwrap(), + } + } + + pub fn parse(root: impl AsRef) -> Result { + Ok(CrateContext { + modules: ParsedModule::parse_recursive(root.as_ref())?, + }) + } +} + +/// Module parse context +/// +/// Keeps track of items defined within a module. +#[derive(Copy, Clone)] +pub struct ModuleContext<'krate> { + detail: &'krate ParsedModule, +} + +impl<'krate> ModuleContext<'krate> { + pub fn items(&self) -> impl Iterator { + self.detail.items.iter() + } +} +struct ParsedModule { + name: String, + file: PathBuf, + path: String, + items: Vec, +} + +impl ParsedModule { + fn parse_recursive(root: &Path) -> Result, anyhow::Error> { + let mut modules = HashMap::new(); + + let root_content = std::fs::read_to_string(root)?; + let root_file = syn::parse_file(&root_content)?; + let root_mod = Self::new( + String::new(), + root.to_owned(), + "crate".to_owned(), + root_file.items, + ); + + struct UnparsedModule { + file: PathBuf, + path: String, + name: String, + item: syn::ItemMod, + } + + let mut unparsed = root_mod + .submodules() + .map(|item| UnparsedModule { + file: root_mod.file.clone(), + path: root_mod.path.clone(), + name: item.ident.to_string(), + item: item.clone(), + }) + .collect::>(); + + while let Some(to_parse) = unparsed.pop() { + let path = format!("{}::{}", to_parse.path, to_parse.name); + let name = to_parse.name; + let module = Self::from_item_mod(&to_parse.file, &path, to_parse.item)?; + + unparsed.extend(module.submodules().map(|item| UnparsedModule { + item: item.clone(), + file: module.file.clone(), + path: module.path.clone(), + name: name.clone(), + })); + modules.insert(name.clone(), module); + } + + modules.insert(root_mod.name.clone(), root_mod); + + Ok(modules) + } + + fn from_item_mod( + parent_file: &Path, + parent_path: &str, + item: syn::ItemMod, + ) -> ParseResult { + let path = format!("{}::{}", parent_path, item.ident); + + Ok(match item.content { + Some((_, items)) => { + // The module content is within the parent file being parsed + Self::new(path, parent_file.to_owned(), item.ident.to_string(), items) + } + None => { + // The module is referencing some other file, so we need to load that + // to parse the items it has. + let parent_dir = parent_file.parent().unwrap(); + let parent_filename = parent_file.file_stem().unwrap().to_str().unwrap(); + let parent_mod_dir = parent_dir.join(parent_filename); + + let possible_file_paths = vec![ + parent_dir.join(format!("{}.rs", item.ident)), + parent_dir.join(format!("{}/mod.rs", item.ident)), + parent_mod_dir.join(format!("{}.rs", item.ident)), + parent_mod_dir.join(format!("{}/mod.rs", item.ident)), + ]; + + let mod_file_path = possible_file_paths + .into_iter() + .find(|p| p.exists()) + .ok_or_else(|| ParseError::new_spanned(&item, "could not find file"))?; + let mod_file_content = std::fs::read_to_string(&mod_file_path) + .map_err(|_| ParseError::new_spanned(&item, "could not read file"))?; + let mod_file = syn::parse_file(&mod_file_content)?; + + Self::new(path, mod_file_path, item.ident.to_string(), mod_file.items) + } + }) + } + + fn new(path: String, file: PathBuf, name: String, items: Vec) -> Self { + Self { + name, + file, + path, + items, + } + } + + fn submodules(&self) -> impl Iterator { + self.items.iter().filter_map(|i| match i { + syn::Item::Mod(item) => Some(item), + _ => None, + }) + } + + fn structs(&self) -> impl Iterator { + self.items.iter().filter_map(|i| match i { + syn::Item::Struct(item) => Some(item), + _ => None, + }) + } + + fn enums(&self) -> impl Iterator { + self.items.iter().filter_map(|i| match i { + syn::Item::Enum(item) => Some(item), + _ => None, + }) + } +} diff --git a/lang/syn/src/parser/mod.rs b/lang/syn/src/parser/mod.rs index 118f45f346..cb20cbe472 100644 --- a/lang/syn/src/parser/mod.rs +++ b/lang/syn/src/parser/mod.rs @@ -1,4 +1,5 @@ pub mod accounts; +pub mod context; pub mod error; pub mod program;