From de896baff7e97fac4dde79078c9a2fa1c652576b Mon Sep 17 00:00:00 2001 From: Minijackson Date: Wed, 18 Dec 2019 20:56:53 +0100 Subject: Big refactoring - entities should be more coherent when parsing multiple files - well defined, language agnostic entity tree - each module has its own configuration - less dead code --- src/parsing/clang/parsing.rs | 748 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 748 insertions(+) create mode 100644 src/parsing/clang/parsing.rs (limited to 'src/parsing/clang/parsing.rs') diff --git a/src/parsing/clang/parsing.rs b/src/parsing/clang/parsing.rs new file mode 100644 index 0000000..5359253 --- /dev/null +++ b/src/parsing/clang/parsing.rs @@ -0,0 +1,748 @@ +use super::config::Config; +use super::entities::*; +use crate::types::Entity; + +use anyhow::{anyhow, Context, Error, Result}; +use clang::{Clang, CompilationDatabase, Index, TranslationUnit, Usr}; +use codemap::CodeMap; +use thiserror::Error; + +use std::collections::BTreeMap; +use std::convert::{TryFrom, TryInto}; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Default)] +struct TopLevel { + namespaces: BTreeMap>, + variables: BTreeMap>, + structs: BTreeMap>, + functions: BTreeMap>, +} + +/* +enum TopLevelEntry<'a, T> { + Vacant { + parent: &'a mut Described, + }, + /// Vacant, but no semantic parent + TopLevel, + Occupied { + entity: &'a mut Described, + }, + Error, +} +*/ + +impl TopLevel { + // Somehow has a lifetime issue I can't get my head around + /* + fn entry<'a, T>(&'a mut self, path: &::clang::Entity) -> Result> + where + T: ClangEntity + FromNamespaceParent + FromTopLevel, + { + let usr = path.get_usr().ok_or_else(|| anyhow!("no usr"))?; + if let Some(parent_path) = parent(&path) { + let parent_entry = self.entry::(&parent_path)?; + if let TopLevelEntry::Occupied { + entity: namespace_parent, + } = parent_entry + { + Ok(match T::from_namespace_parent(namespace_parent, &usr) { + None => TopLevelEntry::Vacant { + parent: namespace_parent, + }, + Some(entity) => TopLevelEntry::Occupied { entity }, + }) + } else { + panic!("Wut"); + } + } else { + Ok(match T::from_toplevel(self, &usr) { + Some(entity) => TopLevelEntry::Occupied { entity }, + None => TopLevelEntry::TopLevel, + }) + } + } + */ + + fn get_entity_mut(&mut self, path: clang::Entity) -> Option<&mut dyn ClangEntity> { + let usr = path.get_usr()?; + if let Some(parent_path) = parent(path) { + let parent = self.get_entity_mut(parent_path)?; + Some(match path.get_kind().try_into().ok()? { + ClangEntityKind::Namespace => { + &mut parent.get_member_namespaces()?.get_mut(&usr)?.entity + } + ClangEntityKind::Variable(_) => { + &mut parent.get_member_variables()?.get_mut(&usr)?.entity + } + ClangEntityKind::Function(_) => { + &mut parent.get_member_functions()?.get_mut(&usr)?.entity + } + ClangEntityKind::Struct(_) => { + &mut parent.get_member_structs()?.get_mut(&usr)?.entity + } + }) + } else { + Some(match path.get_kind().try_into().ok()? { + ClangEntityKind::Namespace => &mut self.namespaces.get_mut(&usr)?.entity, + ClangEntityKind::Variable(_) => &mut self.variables.get_mut(&usr)?.entity, + ClangEntityKind::Struct(_) => &mut self.structs.get_mut(&usr)?.entity, + ClangEntityKind::Function(_) => &mut self.functions.get_mut(&usr)?.entity, + }) + } + } + + fn get_namespace_mut(&mut self, path: clang::Entity) -> Option<&mut Described> { + let usr = path.get_usr()?; + + if let Some(parent_path) = parent(path) { + let parent = self.get_entity_mut(parent_path)?; + parent.get_member_namespaces()?.get_mut(&usr) + } else { + self.namespaces.get_mut(&usr) + } + } + + fn insert(&mut self, path: clang::Entity, entity: Described) -> Result<()> + where + T: ClangEntity + std::fmt::Debug, + Self: TopLevelManipulation, + Namespace: NamespaceParentManipulation, + { + let usr = path.get_usr().ok_or_else(|| anyhow!("no usr"))?; + if let Some(parent_path) = parent(path) { + if let Some(parent_namespace) = self.get_namespace_mut(parent_path) { + parent_namespace + .entity + .get_members_mut() + // Namespace should be able to contain every kind of entity + .unwrap() + .insert(usr, entity); + Ok(()) + } else { + Err(anyhow!("has parent but no parent in tree")) + } + } else { + self.insert_toplevel(usr, entity); + Ok(()) + } + } +} + +// Like .get_semantic_parent(), but return none if the parent is the translation unit +fn parent(libclang_entity: clang::Entity) -> Option { + match libclang_entity.get_semantic_parent() { + Some(parent) => { + if parent.get_kind() != clang::EntityKind::TranslationUnit { + Some(parent) + } else { + None + } + } + None => { + warn!("get_semantic_parent() returned None"); + None + } + } +} + +trait TopLevelManipulation { + fn insert_toplevel(&mut self, usr: Usr, entity: Described); +} + +impl TopLevelManipulation for TopLevel { + fn insert_toplevel(&mut self, usr: Usr, entity: Described) { + self.namespaces.insert(usr, entity); + } +} + +impl TopLevelManipulation for TopLevel { + fn insert_toplevel(&mut self, usr: Usr, entity: Described) { + self.variables.insert(usr, entity); + } +} + +impl TopLevelManipulation for TopLevel { + fn insert_toplevel(&mut self, usr: Usr, entity: Described) { + self.functions.insert(usr, entity); + } +} + +impl TopLevelManipulation for TopLevel { + fn insert_toplevel(&mut self, usr: Usr, entity: Described) { + self.structs.insert(usr, entity); + } +} + +/* +trait FromTopLevel: ClangEntity + Sized { + fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described>; +} + +impl FromTopLevel for Namespace { + fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { + toplevel.namespaces.get_mut(usr) + } +} + +impl FromTopLevel for Variable { + fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { + toplevel.variables.get_mut(usr) + } +} + +impl FromTopLevel for Function { + fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { + toplevel.functions.get_mut(usr) + } +} + +impl FromTopLevel for Struct { + fn from_toplevel<'a>(toplevel: &'a mut TopLevel, usr: &Usr) -> Option<&'a mut Described> { + toplevel.structs.get_mut(usr) + } +} +*/ + +pub(crate) fn parse_compile_commands( + config: &Config, + codemap: &mut CodeMap, +) -> Result> { + let clang = Clang::new().unwrap(); + let index = Index::new( + &clang, /* exclude from pch = */ false, /* print diagnostics = */ false, + ); + + debug!("Extra libclang argument: {:?}", config.extra_args); + + debug!( + "Loading compile commands from: {:?}", + config.compile_commands_location + ); + let database = + CompilationDatabase::from_directory(&config.compile_commands_location).map_err(|()| { + CompileCommandsLoadError { + path: config.compile_commands_location.clone(), + } + })?; + + let toplevel_directory = std::env::current_dir().context("Cannot read current directory")?; + + let mut entities = TopLevel::default(); + + for command in database.get_all_compile_commands().get_commands() { + let directory = command.get_directory(); + trace!("Changing directory to: {:?}", directory); + std::env::set_current_dir(&directory) + .with_context(|| format!("Cannot change current directory to: {:?}", directory))?; + + let filename = command.get_filename(); + + let file_map = codemap.add_file( + filename + .to_str() + .context("File is not valid UTF-8")? + .to_owned(), + std::fs::read_to_string(&filename) + .with_context(|| format!("Cannot readfile: {:?}", filename))?, + ); + + trace!("Parsing file: {:?}", filename); + // The file name is passed as an argument in the compile commands + let mut parser = index.parser(""); + parser.skip_function_bodies(true); + + let mut clang_arguments = command.get_arguments(); + clang_arguments.extend_from_slice(&config.extra_args); + trace!("Parsing with libclang arguments: {:?}", clang_arguments); + parser.arguments(&clang_arguments); + + parse_unit( + &parser + .parse() + .with_context(|| format!("Could not parse file: {:?}", filename))?, + &mut entities, + &toplevel_directory, + file_map.span, + &codemap, + )?; + + trace!("Changing directory to: {:?}", directory); + std::env::set_current_dir(&toplevel_directory).with_context(|| { + format!( + "Cannot change current directory to: {:?}", + toplevel_directory + ) + })?; + } + + let normalized_entities = entities + .namespaces + .into_iter() + .map(|(usr, entity)| (usr.0, entity.into())) + .chain(entities.variables.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) + .chain(entities.structs.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) + .chain(entities.functions.into_iter().map(|(usr, entity)| (usr.0, entity.into()))) + .collect(); + + Ok(normalized_entities) +} + +/* +pub(crate) fn parse_file(path: T, extra_args: &[S]) -> EntitiesManager +where + T: Into, + T: AsRef, + T: ToString, + S: AsRef, + S: std::fmt::Debug, +{ + let mut codemap = CodeMap::new(); + let file_map = codemap.add_file(path.to_string(), std::fs::read_to_string(&path).unwrap()); + let file_span = file_map.span; + + let clang = Clang::new().unwrap(); + let index = Index::new(&clang, true, false); + let mut parser = index.parser(path); + parser.skip_function_bodies(true); + + parser.arguments(&extra_args); + + if log_enabled!(log::Level::Debug) { + for extra_arg in extra_args { + debug!("Extra libclang argument: {:?}", extra_arg); + } + } + + let trans_unit = parser.parse().unwrap(); + let mut entities = EntitiesManager::new(); + + parse_unit( + &trans_unit, + &mut entities, + &std::env::current_dir().unwrap(), + file_span, + &codemap, + ) + .unwrap(); + + entities +} +*/ + +fn parse_unit( + trans_unit: &TranslationUnit, + entities: &mut TopLevel, + base_dir: impl AsRef, + file_span: codemap::Span, + codemap: &CodeMap, +) -> Result<()> { + trans_unit.get_entity().visit_children(|entity, _parent| { + if is_in_system_header(entity, &base_dir) { + trace!( + "Entity is in system header, skipping: USR = {:?}", + entity.get_display_name() + ); + return clang::EntityVisitResult::Continue; + } + + // TODO: wrap this callback in another function so that we can use the + // "?" operator instead of all these `match`es + let usr = match entity.get_usr() { + Some(usr) => usr, + None => return clang::EntityVisitResult::Continue, + }; + trace!("Entity with USR = {:?}", usr); + debug!("Parsing toplevel entity: {:?}", entity); + + add_entity(entity, entities, file_span, codemap) + }); + + /* + use codemap_diagnostic::{ColorConfig, Emitter}; + + let mut emitter = Emitter::stderr(ColorConfig::Auto, Some(&codemap)); + + for diagnostic in trans_unit.get_diagnostics().iter() { + let main_diag = match clang_diag_to_codemap_diag(&diagnostic, file_span) { + Some(diag) => diag, + None => continue, + }; + + let sub_diags = diagnostic + .get_children() + .into_iter() + .filter_map(|diagnostic| clang_diag_to_codemap_diag(&diagnostic, file_span)); + + let fix_it_diags = diagnostic + .get_fix_its() + .into_iter() + .map(|fix_it| clang_fix_it_to_codemap_diag(&fix_it, file_span)); + + emitter.emit( + &std::iter::once(main_diag) + .chain(sub_diags) + .chain(fix_it_diags) + .collect::>(), + ); + } + */ + + Ok(()) +} + +fn is_in_system_header(entity: clang::Entity, base_dir: impl AsRef) -> bool { + if entity.is_in_system_header() { + true + } else if let Some(location) = entity.get_location() { + if let Some(file) = location.get_file_location().file { + !file + .get_path() + .canonicalize() + .unwrap() + .starts_with(base_dir) + } else { + // Not defined in a file? probably shouldn't document + true + } + } else { + // Not defined anywhere? probably shouldn't document + true + } +} + +// Entries encountered in the toplevel lexical context +fn add_entity( + libclang_entity: clang::Entity, + toplevel: &mut TopLevel, + file_span: codemap::Span, + codemap: &CodeMap, +) -> clang::EntityVisitResult { + if libclang_entity.get_usr().is_none() { + return clang::EntityVisitResult::Continue; + }; + + let kind = match ClangEntityKind::try_from(libclang_entity.get_kind()) { + Ok(kind) => kind, + Err(err) => { + use codemap_diagnostic::{ + ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle, + }; + let spans = if let Some(range) = libclang_entity.get_range() { + // TODO: add every file parsed in this translation unit to the + // codemap, so we can properly report errors + if !range.is_in_main_file() { + vec![] + } else { + let begin = range.get_start().get_file_location().offset as u64; + let end = range.get_end().get_file_location().offset as u64; + + vec![SpanLabel { + span: file_span.subspan(begin, end), + label: None, + style: SpanStyle::Primary, + }] + } + } else { + vec![] + }; + + let diag = Diagnostic { + level: Level::Warning, + message: format!("{}", err), + code: None, + spans, + }; + + let mut emitter = Emitter::stderr(ColorConfig::Auto, Some(codemap)); + emitter.emit(&[diag]); + + return clang::EntityVisitResult::Continue; + } + }; + + if let Some(in_tree_entity) = toplevel.get_entity_mut(libclang_entity) { + // if current.has_documentation && !tree.has_documentation { + // append_documentation + // } + } else if libclang_entity.is_definition() { + // TODO: This probably means that you can't put documentation in forward declarations. + // + // This seems restrictive, but since there can be multiple declarations but only one definition, + // you should probably put your documentation on the definition anyway? + // + // Also, skipping forward declarations allows us to not have to insert, then update the tree + // when we see the definition. + + let result = match kind { + ClangEntityKind::Namespace => Described::::try_from(libclang_entity) + .and_then(|namespace| toplevel.insert(libclang_entity, namespace)), + ClangEntityKind::Variable(_) => Described::::try_from(libclang_entity) + .and_then(|variable| toplevel.insert(libclang_entity, variable)), + ClangEntityKind::Struct(_) => Described::::try_from(libclang_entity) + .and_then(|r#struct| toplevel.insert(libclang_entity, r#struct)), + ClangEntityKind::Function(_) => Described::::try_from(libclang_entity) + .and_then(|function| toplevel.insert(libclang_entity, function)), + }; + // TODO: check result + } + + if kind == ClangEntityKind::Namespace { + // Recurse here since namespace definitions are allowed to change between translation units. + ::clang::EntityVisitResult::Recurse + } else { + ::clang::EntityVisitResult::Continue + } +} + +impl<'a, T> TryFrom> for Described +where + T: TryFrom, Error = Error>, +{ + type Error = Error; + + fn try_from(entity: clang::Entity<'a>) -> Result { + Ok(Described:: { + description: get_description(entity)?, + entity: T::try_from(entity)?, + }) + } +} + +impl<'a> TryFrom> for Namespace { + type Error = Error; + + fn try_from(entity: clang::Entity) -> Result { + match entity.get_kind().try_into() { + Ok(ClangEntityKind::Namespace) => {} + _ => panic!("Trying to parse a non-variable into a variable"), + } + debug!("Parsing Namespace: {:?}", entity); + + // Do not recurse here, but recurse in the main loop, since namespace + // definitions is allowed to change between translation units + + Ok(Namespace { + member_namespaces: Default::default(), + member_variables: Default::default(), + member_structs: Default::default(), + member_functions: Default::default(), + }) + } +} + +impl<'a> TryFrom> for Variable { + type Error = Error; + + fn try_from(entity: clang::Entity) -> Result { + let variable_kind; + match entity.get_kind().try_into() { + Ok(ClangEntityKind::Variable(kind)) => { + variable_kind = kind; + } + _ => panic!("Trying to parse a non-variable into a variable"), + } + debug!("Parsing Variable: {:?}", entity); + + let r#type = entity.get_type().unwrap().get_display_name(); + trace!("Variable has type: {:?}", r#type); + + Ok(Variable { + r#type, + kind: variable_kind, + }) + } +} + +impl<'a> TryFrom> for Struct { + type Error = Error; + + fn try_from(entity: clang::Entity) -> Result { + let struct_kind; + match entity.get_kind().try_into() { + Ok(ClangEntityKind::Struct(kind)) => { + struct_kind = kind; + } + _ => panic!("Trying to parse a non-class into a class"), + } + debug!("Parsing Struct: {:?}", entity); + + let mut member_variables = BTreeMap::new(); + let mut member_structs = BTreeMap::new(); + let mut member_functions = BTreeMap::new(); + + for child in entity.get_children() { + trace!("Struct has child: {:?}", child); + + match child.get_kind().try_into() { + Ok(ClangEntityKind::Variable(_)) => { + let child_usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; + member_variables.insert(child_usr, Described::::try_from(child)?); + } + Ok(ClangEntityKind::Struct(_)) => { + let child_usr: Usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; + member_structs.insert(child_usr, Described::::try_from(child)?); + } + Ok(ClangEntityKind::Function(_)) => { + let child_usr = child.get_usr().ok_or_else(|| anyhow!("no usr"))?; + member_functions.insert(child_usr, Described::::try_from(child)?); + } + _ => trace!("Skipping child"), + } + } + + Ok(Struct { + kind: struct_kind, + member_functions, + member_structs, + member_variables, + }) + } +} + +impl<'a> TryFrom> for Function { + type Error = Error; + + fn try_from(entity: clang::Entity) -> Result { + let function_kind; + match entity.get_kind().try_into() { + Ok(ClangEntityKind::Function(kind)) => { + function_kind = kind; + } + _ => panic!("Trying to parse a non-function into a function"), + } + debug!("Parsing Function: {:?}", entity); + + let return_type = entity.get_result_type().unwrap().get_display_name(); + trace!("Function has return type: {:?}", return_type); + let arguments = entity + .get_arguments() + .unwrap() + .into_iter() + .map(|arg| { + let name = arg + .get_display_name() + .unwrap_or_else(|| String::from("unnamed")); + let r#type = arg.get_type().unwrap().get_display_name(); + trace!("Function has argument {:?} of type {:?}", name, r#type); + FunctionArgument { name, r#type } + }) + .collect(); + + Ok(Function { + kind: function_kind, + arguments, + return_type, + }) + } +} + +fn get_description(entity: clang::Entity) -> Result { + let name = entity + .get_display_name() + .ok_or_else(|| anyhow!("Entity has no name: {:?}", entity))?; + + // TODO: is that the best? + if let (Some(brief), Some(comment)) = (entity.get_comment_brief(), entity.get_comment()) { + Ok(Description { + name, + brief, + detailed: parse_comment(comment), + }) + } else { + Ok(Description { + name, + brief: String::new(), + detailed: String::new(), + }) + } +} + +pub fn parse_comment(raw: String) -> String { + #[derive(Debug)] + enum CommentStyle { + // Comments of type `/**` or `/*!` + Starred, + // Comments of type `///` + SingleLine, + } + + let mut chars = raw.chars(); + let style = match &chars.as_str()[..3] { + "/*!" | "/**" => CommentStyle::Starred, + "///" => CommentStyle::SingleLine, + _ => panic!("Comment is empty or doesn't start with either `///`, `/**`, or `/*!`"), + }; + + chars.nth(2); + + let mut result = String::new(); + + 'parse_loop: loop { + let maybe_space = chars.next(); + let mut empty_line = false; + match maybe_space { + // TODO: Warn on empty comments + None => break, + Some(' ') => {} + Some('\n') => { + empty_line = true; + result.push('\n'); + } + Some(ch) => result.push(ch), + } + + if !empty_line { + let rest = chars.as_str(); + match rest.find('\n') { + None => { + result.push_str(rest); + break; + } + Some(position) => { + result.push_str(&rest[..=position]); + chars.nth(position); + } + } + } + + // Beginning of the line + let first_non_ws_ch = 'ws_loop: loop { + let maybe_whitespace = chars.next(); + match maybe_whitespace { + None => break 'parse_loop, + Some(ch) if ch.is_whitespace() => continue, + Some(ch) => break 'ws_loop ch, + } + }; + + match style { + CommentStyle::Starred if first_non_ws_ch == '*' => { + if &chars.as_str()[..1] == "/" { + break; + } + } + CommentStyle::Starred => result.push(first_non_ws_ch), + CommentStyle::SingleLine => { + assert!(first_non_ws_ch == '/'); + let rest = chars.as_str(); + if &rest[..2] == "//" { + chars.nth(1); + } else if &rest[..1] == "/" { + chars.nth(0); + } else { + panic!("Could not parse comment"); + } + } + } + } + + result +} + +#[derive(Debug, Clone, Error)] +#[error("Failed to load 'compile_commands.json' at path: {:?}", path)] +pub(crate) struct CompileCommandsLoadError { + path: PathBuf, +} -- cgit v1.2.3