From e773caea8010b87726ea524d31798fb2e43e12f4 Mon Sep 17 00:00:00 2001 From: Minijackson Date: Sat, 21 Dec 2019 12:13:21 +0100 Subject: newtype in types, more generator config, parsing -> parser --- src/parsing/clang/parsing.rs | 748 ------------------------------------------- 1 file changed, 748 deletions(-) delete mode 100644 src/parsing/clang/parsing.rs (limited to 'src/parsing/clang/parsing.rs') diff --git a/src/parsing/clang/parsing.rs b/src/parsing/clang/parsing.rs deleted file mode 100644 index 5359253..0000000 --- a/src/parsing/clang/parsing.rs +++ /dev/null @@ -1,748 +0,0 @@ -use super::config::Config; -use super::entities::*; -use crate::types::Entity; - -use anyhow::{anyhow, Context, Error, Result}; -use clang::{Clang, CompilationDatabase, Index, TranslationUnit, Usr}; -use codemap::CodeMap; -use thiserror::Error; - -use std::collections::BTreeMap; -use std::convert::{TryFrom, TryInto}; -use std::path::{Path, PathBuf}; - -#[derive(Debug, Default)] -struct TopLevel { - namespaces: BTreeMap>, - variables: BTreeMap>, - structs: BTreeMap>, - functions: BTreeMap>, -} - -/* -enum TopLevelEntry<'a, T> { - Vacant { - parent: &'a mut Described, - }, - /// Vacant, but no semantic parent - TopLevel, - Occupied { - entity: &'a mut Described, - }, - Error, -} -*/ - -impl TopLevel { - // Somehow has a lifetime issue I can't get my head around - /* - fn entry<'a, T>(&'a mut self, path: &::clang::Entity) -> Result> - where - T: ClangEntity + FromNamespaceParent + FromTopLevel, - { - let usr = path.get_usr().ok_or_else(|| anyhow!("no usr"))?; - if let Some(parent_path) = parent(&path) { - let parent_entry = self.entry::(&parent_path)?; - if let TopLevelEntry::Occupied { - entity: namespace_parent, - } = parent_entry - { - Ok(match T::from_namespace_parent(namespace_parent, &usr) { - None => TopLevelEntry::Vacant { - parent: namespace_parent, - }, - Some(entity) => TopLevelEntry::Occupied { entity }, - }) - } else { - panic!("Wut"); - } - } else { - Ok(match T::from_toplevel(self, &usr) { - Some(entity) => TopLevelEntry::Occupied { entity }, - None => TopLevelEntry::TopLevel, - }) - } - } - */ - - fn get_entity_mut(&mut self, path: clang::Entity) -> Option<&mut dyn ClangEntity> { - let usr = path.get_usr()?; - if let Some(parent_path) = parent(path) { - let parent = self.get_entity_mut(parent_path)?; - Some(match path.get_kind().try_into().ok()? { - ClangEntityKind::Namespace => { - &mut parent.get_member_namespaces()?.get_mut(&usr)?.entity - } - ClangEntityKind::Variable(_) => { - &mut parent.get_member_variables()?.get_mut(&usr)?.entity - } - ClangEntityKind::Function(_) => { - &mut parent.get_member_functions()?.get_mut(&usr)?.entity - } - ClangEntityKind::Struct(_) => { - &mut parent.get_member_structs()?.get_mut(&usr)?.entity - } - }) - } else { - Some(match path.get_kind().try_into().ok()? { - ClangEntityKind::Namespace => &mut self.namespaces.get_mut(&usr)?.entity, - ClangEntityKind::Variable(_) => &mut self.variables.get_mut(&usr)?.entity, - ClangEntityKind::Struct(_) => &mut self.structs.get_mut(&usr)?.entity, - ClangEntityKind::Function(_) => &mut self.functions.get_mut(&usr)?.entity, - }) - } - } - - fn get_namespace_mut(&mut self, path: clang::Entity) -> Option<&mut Described> { - let usr = path.get_usr()?; - - if let Some(parent_path) = parent(path) { - let parent = self.get_entity_mut(parent_path)?; - parent.get_member_namespaces()?.get_mut(&usr) - } else { - self.namespaces.get_mut(&usr) - } - } - - fn insert(&mut self, path: clang::Entity, entity: Described) -> Result<()> - where - T: ClangEntity + std::fmt::Debug, - Self: TopLevelManipulation, - Namespace: NamespaceParentManipulation, - { - let usr = path.get_usr().ok_or_else(|| anyhow!("no usr"))?; - if let Some(parent_path) = parent(path) { - if let Some(parent_namespace) = self.get_namespace_mut(parent_path) { - parent_namespace - .entity - .get_members_mut() - // Namespace should be able to contain every kind of entity - .unwrap() - .insert(usr, entity); - Ok(()) - } else { - Err(anyhow!("has parent but no parent in tree")) - } - } else { - self.insert_toplevel(usr, entity); - Ok(()) - } - } -} - -// Like .get_semantic_parent(), but return none if the parent is the translation unit -fn parent(libclang_entity: clang::Entity) -> Option { - match libclang_entity.get_semantic_parent() { - Some(parent) => { - if parent.get_kind() != clang::EntityKind::TranslationUnit { - Some(parent) - } else { - None - } - } - None => { - warn!("get_semantic_parent() returned None"); - None - } - } -} - -trait TopLevelManipulation { - fn insert_toplevel(&mut self, usr: Usr, entity: Described); -} - -impl TopLevelManipulation for TopLevel { - fn insert_toplevel(&mut self, usr: Usr, entity: Described) { - self.namespaces.insert(usr, entity); - } -} - -impl TopLevelManipulation for TopLevel { - fn insert_toplevel(&mut self, usr: Usr, entity: Described) { - self.variables.insert(usr, entity); - } -} - -impl TopLevelManipulation for TopLevel { - fn insert_toplevel(&mut self, usr: Usr, entity: Described) { - self.functions.insert(usr, entity); - } -} - -impl TopLevelManipulation for TopLevel { - fn insert_toplevel(&mut self, usr: Usr, entity: Described) { - self.structs.insert(usr, entity); - } -} - -/* -trait FromTopLevel: ClangEntity + Sized { - fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described>; -} - -impl FromTopLevel for Namespace { - fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { - toplevel.namespaces.get_mut(usr) - } -} - -impl FromTopLevel for Variable { - fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { - toplevel.variables.get_mut(usr) - } -} - -impl FromTopLevel for Function { - fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { - toplevel.functions.get_mut(usr) - } -} - -impl FromTopLevel for Struct { - fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { - toplevel.structs.get_mut(usr) - } -} -*/ - -pub(crate) fn parse_compile_commands( - config: &Config, - codemap: &mut CodeMap, -) -> Result> { - let clang = Clang::new().unwrap(); - let index = Index::new( - &clang, /* exclude from pch = */ false, /* print diagnostics = */ false, - ); - - debug!("Extra libclang argument: {:?}", config.extra_args); - - debug!( - "Loading compile commands from: {:?}", - config.compile_commands_location - ); - let database = - CompilationDatabase::from_directory(&config.compile_commands_location).map_err(|()| { - CompileCommandsLoadError { - path: config.compile_commands_location.clone(), - } - })?; - - let toplevel_directory = std::env::current_dir().context("Cannot read current directory")?; - - let mut entities = TopLevel::default(); - - for command in database.get_all_compile_commands().get_commands() { - let directory = command.get_directory(); - trace!("Changing directory to: {:?}", directory); - std::env::set_current_dir(&directory) - .with_context(|| format!("Cannot change current directory to: {:?}", directory))?; - - let filename = command.get_filename(); - - let file_map = codemap.add_file( - filename - .to_str() - .context("File is not valid UTF-8")? - .to_owned(), - std::fs::read_to_string(&filename) - .with_context(|| format!("Cannot readfile: {:?}", filename))?, - ); - - trace!("Parsing file: {:?}", filename); - // The file name is passed as an argument in the compile commands - let mut parser = index.parser(""); - parser.skip_function_bodies(true); - - let mut clang_arguments = command.get_arguments(); - clang_arguments.extend_from_slice(&config.extra_args); - trace!("Parsing with libclang arguments: {:?}", clang_arguments); - parser.arguments(&clang_arguments); - - parse_unit( - &parser - .parse() - .with_context(|| format!("Could not parse file: {:?}", filename))?, - &mut entities, - &toplevel_directory, - file_map.span, - &codemap, - )?; - - trace!("Changing directory to: {:?}", directory); - std::env::set_current_dir(&toplevel_directory).with_context(|| { - format!( - "Cannot change current directory to: {:?}", - toplevel_directory - ) - })?; - } - - let normalized_entities = entities - .namespaces - .into_iter() - .map(|(usr, entity)| (usr.0, entity.into())) - .chain(entities.variables.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) - .chain(entities.structs.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) - .chain(entities.functions.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) - .collect(); - - Ok(normalized_entities) -} - -/* -pub(crate) fn parse_file(path: T, extra_args: &[S]) -> EntitiesManager -where - T: Into, - T: AsRef, - T: ToString, - S: AsRef, - S: std::fmt::Debug, -{ - let mut codemap = CodeMap::new(); - let file_map = codemap.add_file(path.to_string(), std::fs::read_to_string(&path).unwrap()); - let file_span = file_map.span; - - let clang = Clang::new().unwrap(); - let index = Index::new(&clang, true, false); - let mut parser = index.parser(path); - parser.skip_function_bodies(true); - - parser.arguments(&extra_args); - - if log_enabled!(log::Level::Debug) { - for extra_arg in extra_args { - debug!("Extra libclang argument: {:?}", extra_arg); - } - } - - let trans_unit = parser.parse().unwrap(); - let mut entities = EntitiesManager::new(); - - parse_unit( - &trans_unit, - &mut entities, - &std::env::current_dir().unwrap(), - file_span, - &codemap, - ) - .unwrap(); - - entities -} -*/ - -fn parse_unit( - trans_unit: &TranslationUnit, - entities: &mut TopLevel, - base_dir: impl AsRef, - file_span: codemap::Span, - codemap: &CodeMap, -) -> Result<()> { - trans_unit.get_entity().visit_children(|entity, _parent| { - if is_in_system_header(entity, &base_dir) { - trace!( - "Entity is in system header, skipping: USR = {:?}", - entity.get_display_name() - ); - return clang::EntityVisitResult::Continue; - } - - // TODO: wrap this callback in another function so that we can use the - // "?" operator instead of all these `match`es - let usr = match entity.get_usr() { - Some(usr) => usr, - None => return clang::EntityVisitResult::Continue, - }; - trace!("Entity with USR = {:?}", usr); - debug!("Parsing toplevel entity: {:?}", entity); - - add_entity(entity, entities, file_span, codemap) - }); - - /* - use codemap_diagnostic::{ColorConfig, Emitter}; - - let mut emitter = Emitter::stderr(ColorConfig::Auto, Some(&codemap)); - - for diagnostic in trans_unit.get_diagnostics().iter() { - let main_diag = match clang_diag_to_codemap_diag(&diagnostic, file_span) { - Some(diag) => diag, - None => continue, - }; - - let sub_diags = diagnostic - .get_children() - .into_iter() - .filter_map(|diagnostic| clang_diag_to_codemap_diag(&diagnostic, file_span)); - - let fix_it_diags = diagnostic - .get_fix_its() - .into_iter() - .map(|fix_it| clang_fix_it_to_codemap_diag(&fix_it, file_span)); - - emitter.emit( - &std::iter::once(main_diag) - .chain(sub_diags) - .chain(fix_it_diags) - .collect::>(), - ); - } - */ - - Ok(()) -} - -fn is_in_system_header(entity: clang::Entity, base_dir: impl AsRef) -> bool { - if entity.is_in_system_header() { - true - } else if let Some(location) = entity.get_location() { - if let Some(file) = location.get_file_location().file { - !file - .get_path() - .canonicalize() - .unwrap() - .starts_with(base_dir) - } else { - // Not defined in a file? probably shouldn't document - true - } - } else { - // Not defined anywhere? probably shouldn't document - true - } -} - -// Entries encountered in the toplevel lexical context -fn add_entity( - libclang_entity: clang::Entity, - toplevel: &mut TopLevel, - file_span: codemap::Span, - codemap: &CodeMap, -) -> clang::EntityVisitResult { - if libclang_entity.get_usr().is_none() { - return clang::EntityVisitResult::Continue; - }; - - let kind = match ClangEntityKind::try_from(libclang_entity.get_kind()) { - Ok(kind) => kind, - Err(err) => { - use codemap_diagnostic::{ - ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle, - }; - let spans = if let Some(range) = libclang_entity.get_range() { - // TODO: add every file parsed in this translation unit to the - // codemap, so we can properly report errors - if !range.is_in_main_file() { - vec![] - } else { - let begin = range.get_start().get_file_location().offset as u64; - let end = range.get_end().get_file_location().offset as u64; - - vec![SpanLabel { - span: file_span.subspan(begin, end), - label: None, - style: SpanStyle::Primary, - }] - } - } else { - vec![] - }; - - let diag = Diagnostic { - level: Level::Warning, - message: format!("{}", err), - code: None, - spans, - }; - - let mut emitter = Emitter::stderr(ColorConfig::Auto, Some(codemap)); - emitter.emit(&[diag]); - - return clang::EntityVisitResult::Continue; - } - }; - - if let Some(in_tree_entity) = toplevel.get_entity_mut(libclang_entity) { - // if current.has_documentation && !tree.has_documentation { - // append_documentation - // } - } else if libclang_entity.is_definition() { - // TODO: This probably means that you can't put documentation in forward declarations. - // - // This seems restrictive, but since there can be multiple declarations but only one definition, - // you should probably put your documentation on the definition anyway? - // - // Also, skipping forward declarations allows us to not have to insert, then update the tree - // when we see the definition. - - let result = match kind { - ClangEntityKind::Namespace => Described::::try_from(libclang_entity) - .and_then(|namespace| toplevel.insert(libclang_entity, namespace)), - ClangEntityKind::Variable(_) => Described::::try_from(libclang_entity) - .and_then(|variable| toplevel.insert(libclang_entity, variable)), - ClangEntityKind::Struct(_) => Described::::try_from(libclang_entity) - .and_then(|r#struct| toplevel.insert(libclang_entity, r#struct)), - ClangEntityKind::Function(_) => Described::::try_from(libclang_entity) - .and_then(|function| toplevel.insert(libclang_entity, function)), - }; - // TODO: check result - } - - if kind == ClangEntityKind::Namespace { - // Recurse here since namespace definitions are allowed to change between translation units. - ::clang::EntityVisitResult::Recurse - } else { - ::clang::EntityVisitResult::Continue - } -} - -impl<'a, T> TryFrom> for Described -where - T: TryFrom, Error = Error>, -{ - type Error = Error; - - fn try_from(entity: clang::Entity<'a>) -> Result { - Ok(Described:: { - description: get_description(entity)?, - entity: T::try_from(entity)?, - }) - } -} - -impl<'a> TryFrom> for Namespace { - type Error = Error; - - fn try_from(entity: clang::Entity) -> Result { - match entity.get_kind().try_into() { - Ok(ClangEntityKind::Namespace) => {} - _ => panic!("Trying to parse a non-variable into a variable"), - } - debug!("Parsing Namespace: {:?}", entity); - - // Do not recurse here, but recurse in the main loop, since namespace - // definitions is allowed to change between translation units - - Ok(Namespace { - member_namespaces: Default::default(), - member_variables: Default::default(), - member_structs: Default::default(), - member_functions: Default::default(), - }) - } -} - -impl<'a> TryFrom> for Variable { - type Error = Error; - - fn try_from(entity: clang::Entity) -> Result { - let variable_kind; - match entity.get_kind().try_into() { - Ok(ClangEntityKind::Variable(kind)) => { - variable_kind = kind; - } - _ => panic!("Trying to parse a non-variable into a variable"), - } - debug!("Parsing Variable: {:?}", entity); - - let r#type = entity.get_type().unwrap().get_display_name(); - trace!("Variable has type: {:?}", r#type); - - Ok(Variable { - r#type, - kind: variable_kind, - }) - } -} - -impl<'a> TryFrom> for Struct { - type Error = Error; - - fn try_from(entity: clang::Entity) -> Result { - let struct_kind; - match entity.get_kind().try_into() { - Ok(ClangEntityKind::Struct(kind)) => { - struct_kind = kind; - } - _ => panic!("Trying to parse a non-class into a class"), - } - debug!("Parsing Struct: {:?}", entity); - - let mut member_variables = BTreeMap::new(); - let mut member_structs = BTreeMap::new(); - let mut member_functions = BTreeMap::new(); - - for child in entity.get_children() { - trace!("Struct has child: {:?}", child); - - match child.get_kind().try_into() { - Ok(ClangEntityKind::Variable(_)) => { - let child_usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; - member_variables.insert(child_usr, Described::::try_from(child)?); - } - Ok(ClangEntityKind::Struct(_)) => { - let child_usr: Usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; - member_structs.insert(child_usr, Described::::try_from(child)?); - } - Ok(ClangEntityKind::Function(_)) => { - let child_usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; - member_functions.insert(child_usr, Described::::try_from(child)?); - } - _ => trace!("Skipping child"), - } - } - - Ok(Struct { - kind: struct_kind, - member_functions, - member_structs, - member_variables, - }) - } -} - -impl<'a> TryFrom> for Function { - type Error = Error; - - fn try_from(entity: clang::Entity) -> Result { - let function_kind; - match entity.get_kind().try_into() { - Ok(ClangEntityKind::Function(kind)) => { - function_kind = kind; - } - _ => panic!("Trying to parse a non-function into a function"), - } - debug!("Parsing Function: {:?}", entity); - - let return_type = entity.get_result_type().unwrap().get_display_name(); - trace!("Function has return type: {:?}", return_type); - let arguments = entity - .get_arguments() - .unwrap() - .into_iter() - .map(|arg| { - let name = arg - .get_display_name() - .unwrap_or_else(|| String::from("unnamed")); - let r#type = arg.get_type().unwrap().get_display_name(); - trace!("Function has argument {:?} of type {:?}", name, r#type); - FunctionArgument { name, r#type } - }) - .collect(); - - Ok(Function { - kind: function_kind, - arguments, - return_type, - }) - } -} - -fn get_description(entity: clang::Entity) -> Result { - let name = entity - .get_display_name() - .ok_or_else(|| anyhow!("Entity has no name: {:?}", entity))?; - - // TODO: is that the best? - if let (Some(brief), Some(comment)) = (entity.get_comment_brief(), entity.get_comment()) { - Ok(Description { - name, - brief, - detailed: parse_comment(comment), - }) - } else { - Ok(Description { - name, - brief: String::new(), - detailed: String::new(), - }) - } -} - -pub fn parse_comment(raw: String) -> String { - #[derive(Debug)] - enum CommentStyle { - // Comments of type `/**` or `/*!` - Starred, - // Comments of type `///` - SingleLine, - } - - let mut chars = raw.chars(); - let style = match &chars.as_str()[..3] { - "/*!" | "/**" => CommentStyle::Starred, - "///" => CommentStyle::SingleLine, - _ => panic!("Comment is empty or doesn't start with either `///`, `/**`, or `/*!`"), - }; - - chars.nth(2); - - let mut result = String::new(); - - 'parse_loop: loop { - let maybe_space = chars.next(); - let mut empty_line = false; - match maybe_space { - // TODO: Warn on empty comments - None => break, - Some(' ') => {} - Some('\n') => { - empty_line = true; - result.push('\n'); - } - Some(ch) => result.push(ch), - } - - if !empty_line { - let rest = chars.as_str(); - match rest.find('\n') { - None => { - result.push_str(rest); - break; - } - Some(position) => { - result.push_str(&rest[..=position]); - chars.nth(position); - } - } - } - - // Beginning of the line - let first_non_ws_ch = 'ws_loop: loop { - let maybe_whitespace = chars.next(); - match maybe_whitespace { - None => break 'parse_loop, - Some(ch) if ch.is_whitespace() => continue, - Some(ch) => break 'ws_loop ch, - } - }; - - match style { - CommentStyle::Starred if first_non_ws_ch == '*' => { - if &chars.as_str()[..1] == "/" { - break; - } - } - CommentStyle::Starred => result.push(first_non_ws_ch), - CommentStyle::SingleLine => { - assert!(first_non_ws_ch == '/'); - let rest = chars.as_str(); - if &rest[..2] == "//" { - chars.nth(1); - } else if &rest[..1] == "/" { - chars.nth(0); - } else { - panic!("Could not parse comment"); - } - } - } - } - - result -} - -#[derive(Debug, Clone, Error)] -#[error("Failed to load 'compile_commands.json' at path: {:?}", path)] -pub(crate) struct CompileCommandsLoadError { - path: PathBuf, -} -- cgit v1.2.3