diff options
author | Minijackson <minijackson@riseup.net> | 2021-11-07 23:09:34 +0100 |
---|---|---|
committer | Minijackson <minijackson@riseup.net> | 2021-11-07 23:09:34 +0100 |
commit | 517cabe8ec54d0bf5f5f9cc9089d76a1fad7bb6a (patch) | |
tree | 3140cf7b16727f4752633f4e6f5d098f23889af0 /src/build.rs | |
download | pandoc-docbook-517cabe8ec54d0bf5f5f9cc9089d76a1fad7bb6a.tar.gz pandoc-docbook-517cabe8ec54d0bf5f5f9cc9089d76a1fad7bb6a.zip |
initial commit with PoC
Diffstat (limited to 'src/build.rs')
-rw-r--r-- | src/build.rs | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/src/build.rs b/src/build.rs new file mode 100644 index 0000000..0b0c646 --- /dev/null +++ b/src/build.rs | |||
@@ -0,0 +1,451 @@ | |||
1 | use std::path::{Path, PathBuf}; | ||
2 | |||
3 | use eyre::{eyre, ContextCompat, Result, WrapErr}; | ||
4 | use log::{debug, error, log_enabled, trace, warn}; | ||
5 | use pandoc_ast::MutVisitor; | ||
6 | |||
7 | use crate::{ | ||
8 | filters, | ||
9 | utils::{AutoIdentifier, PandocMeta, PandocOutputExt}, | ||
10 | }; | ||
11 | |||
12 | pub fn do_build(config: &crate::config::Config) -> Result<()> { | ||
13 | let summary = Summary::try_from_file(&config.book.summary)?; | ||
14 | let source_root = Path::new(&config.book.summary) | ||
15 | .parent() | ||
16 | .expect("Summary has no parent"); | ||
17 | let files = summary.collect_source_files(source_root)?; | ||
18 | |||
19 | let build_dir = Path::new(&config.build.build_dir); | ||
20 | trace!("Creating build directory: '{}'", build_dir.display()); | ||
21 | std::fs::create_dir_all(build_dir).wrap_err_with(|| { | ||
22 | format!( | ||
23 | "Could not create build directory: '{}'", | ||
24 | build_dir.display() | ||
25 | ) | ||
26 | })?; | ||
27 | |||
28 | // Pre-create files so that we know which links to relativize | ||
29 | for SourceFile { path, .. } in &files { | ||
30 | let output_file = build_dir.join(path.with_extension("html")); | ||
31 | |||
32 | let product_dir = build_dir.join(path.parent().expect("Source file has no parent")); | ||
33 | trace!("Creating product directory: '{}'", product_dir.display()); | ||
34 | std::fs::create_dir_all(&product_dir).wrap_err_with(|| { | ||
35 | format!( | ||
36 | "Could not create build output directory: '{}'", | ||
37 | product_dir.display() | ||
38 | ) | ||
39 | })?; | ||
40 | |||
41 | std::fs::OpenOptions::new() | ||
42 | .write(true) | ||
43 | .create(true) | ||
44 | .open(&output_file) | ||
45 | .wrap_err_with(|| { | ||
46 | format!("Failed to create output file: '{}'", output_file.display()) | ||
47 | })?; | ||
48 | } | ||
49 | |||
50 | for SourceFile { path, source } in &files { | ||
51 | let mut pandoc_command = pandoc::new(); | ||
52 | |||
53 | let output_file = build_dir.join(path.with_extension("html")); | ||
54 | |||
55 | debug!("Generating file: '{}'", output_file.display()); | ||
56 | |||
57 | // To be captured in the filter | ||
58 | let config_clone = config.clone(); | ||
59 | let source_dir = path | ||
60 | .parent() | ||
61 | .expect("Source file has no parent") | ||
62 | .to_path_buf(); | ||
63 | let build_dir_clone = build_dir.to_path_buf(); | ||
64 | let summary_clone = summary.source.clone(); | ||
65 | |||
66 | pandoc_command | ||
67 | .set_input(pandoc::InputKind::Pipe(source.to_json())) | ||
68 | .set_input_format(pandoc::InputFormat::Json, vec![]) | ||
69 | .set_output(pandoc::OutputKind::File(output_file)) | ||
70 | .set_output_format(pandoc::OutputFormat::Html5, vec![]) | ||
71 | .add_options(&[pandoc::PandocOption::SelfContained]) | ||
72 | .add_filter(move |source| { | ||
73 | let level = source_dir | ||
74 | .components() | ||
75 | .skip_while(|c| matches!(c, std::path::Component::CurDir)) | ||
76 | .count(); | ||
77 | |||
78 | let mut insert_summary_filter = filters::InsertSummary { | ||
79 | level, | ||
80 | summary: &summary_clone, | ||
81 | }; | ||
82 | |||
83 | let mut relativize_urls_filter = filters::RelativizeUrls { | ||
84 | config: &config_clone, | ||
85 | // TODO: other output formats | ||
86 | extension: "html", | ||
87 | build_dir: &build_dir_clone, | ||
88 | source_dir: &source_dir, | ||
89 | }; | ||
90 | |||
91 | let mut source = pandoc_ast::Pandoc::from_json(&source); | ||
92 | insert_summary_filter.walk_pandoc(&mut source); | ||
93 | relativize_urls_filter.walk_pandoc(&mut source); | ||
94 | source.to_json() | ||
95 | }); | ||
96 | |||
97 | if log_enabled!(log::Level::Trace) { | ||
98 | pandoc_command.set_show_cmdline(true); | ||
99 | } | ||
100 | |||
101 | pandoc_command | ||
102 | .execute() | ||
103 | .wrap_err_with(|| format!("Failed to generate output of: '{}'", path.display()))?; | ||
104 | } | ||
105 | |||
106 | Ok(()) | ||
107 | } | ||
108 | |||
109 | // TODO: move that into generated.rs | ||
110 | fn generate_source( | ||
111 | title: Vec<pandoc_ast::Inline>, | ||
112 | children: Vec<(PandocMeta, PathBuf)>, | ||
113 | level: usize, | ||
114 | ) -> Result<pandoc_ast::Pandoc> { | ||
115 | // TODO: make that text configurable | ||
116 | let mut content = vec![pandoc_ast::Block::Para(vec![pandoc_ast::Inline::Str( | ||
117 | "Here are the articles in this section:".to_string(), | ||
118 | )])]; | ||
119 | |||
120 | for (mut child, file) in children { | ||
121 | let title = match child.remove("title") { | ||
122 | None => { | ||
123 | warn!("Missing title for file: '{}'", file.display()); | ||
124 | vec![pandoc_ast::Inline::Str("Untitled page".to_string())] | ||
125 | } | ||
126 | Some(pandoc_ast::MetaValue::MetaInlines(inlines)) => inlines, | ||
127 | Some(pandoc_ast::MetaValue::MetaString(s)) => { | ||
128 | vec![pandoc_ast::Inline::Str(s)] | ||
129 | } | ||
130 | // TODO: check that other values are actually invalid | ||
131 | _ => { | ||
132 | error!("Invalid value for title"); | ||
133 | vec![pandoc_ast::Inline::Str("Untitled page".to_string())] | ||
134 | } | ||
135 | }; | ||
136 | |||
137 | let link_target = std::iter::repeat(std::path::Component::ParentDir) | ||
138 | .take(level) | ||
139 | .collect::<PathBuf>() | ||
140 | .join(file); | ||
141 | |||
142 | content.push(pandoc_ast::Block::Para(vec![pandoc_ast::Inline::Link( | ||
143 | // TODO: attribute to recognize big links? | ||
144 | (String::new(), vec![], vec![]), | ||
145 | title, | ||
146 | ( | ||
147 | link_target | ||
148 | .to_str() | ||
149 | .expect("Filename contains invalid unicode") | ||
150 | .to_string(), | ||
151 | String::new(), | ||
152 | ), | ||
153 | )])); | ||
154 | } | ||
155 | |||
156 | let mut meta = PandocMeta::new(); | ||
157 | meta.insert( | ||
158 | "title".to_string(), | ||
159 | pandoc_ast::MetaValue::MetaInlines(title), | ||
160 | ); | ||
161 | |||
162 | Ok(pandoc_ast::Pandoc { | ||
163 | meta, | ||
164 | blocks: content, | ||
165 | pandoc_api_version: vec![1, 22], | ||
166 | }) | ||
167 | } | ||
168 | |||
169 | fn list_content(block: &mut pandoc_ast::Block) -> Result<&mut Vec<Vec<pandoc_ast::Block>>> { | ||
170 | match block { | ||
171 | pandoc_ast::Block::OrderedList(_, list) => Ok(list), | ||
172 | pandoc_ast::Block::BulletList(list) => Ok(list), | ||
173 | _ => Err(eyre!("Expected list in summary, found something else")), | ||
174 | } | ||
175 | } | ||
176 | |||
177 | fn try_into_node_vec(vec: &mut Vec<Vec<pandoc_ast::Block>>) -> Result<Vec<Node>> { | ||
178 | vec.iter_mut().map(Node::try_from_vec_block).collect() | ||
179 | } | ||
180 | |||
181 | // TODO: support separators like these: | ||
182 | // --------- | ||
183 | |||
184 | #[derive(Debug)] | ||
185 | pub struct Summary { | ||
186 | source: pandoc_ast::Pandoc, | ||
187 | nodes: Vec<Node>, | ||
188 | } | ||
189 | |||
190 | #[derive(Debug)] | ||
191 | struct SourceFile { | ||
192 | path: PathBuf, | ||
193 | source: pandoc_ast::Pandoc, | ||
194 | } | ||
195 | |||
196 | // TODO: move that into summary.rs | ||
197 | impl Summary { | ||
198 | fn try_from_file(file: &str) -> Result<Self> { | ||
199 | debug!("Parsing summary"); | ||
200 | let mut pandoc_command = pandoc::new(); | ||
201 | pandoc_command | ||
202 | .add_input(file) | ||
203 | .set_output_format(pandoc::OutputFormat::Json, vec![]) | ||
204 | .set_output(pandoc::OutputKind::Pipe); | ||
205 | |||
206 | trace!("Launching pandoc command"); | ||
207 | |||
208 | if log_enabled!(log::Level::Trace) { | ||
209 | pandoc_command.set_show_cmdline(true); | ||
210 | } | ||
211 | |||
212 | let output = pandoc_command | ||
213 | .execute() | ||
214 | .wrap_err("Could not execute pandoc")? | ||
215 | .buffer(); | ||
216 | |||
217 | let document = pandoc_ast::Pandoc::from_json(&output); | ||
218 | |||
219 | let summary: Self = document.try_into()?; | ||
220 | if summary.has_files_missing( | ||
221 | Path::new(file) | ||
222 | .parent() | ||
223 | .expect("Summary file has no parent"), | ||
224 | ) { | ||
225 | return Err(eyre!("Files from the summary are missing, aborting")); | ||
226 | } | ||
227 | |||
228 | Ok(summary) | ||
229 | } | ||
230 | |||
231 | fn has_files_missing(&self, root: &Path) -> bool { | ||
232 | // Do not use `.any()` to prevent short-circuiting, we want to report all missing files | ||
233 | self.nodes.iter().fold(false, |acc, node| { | ||
234 | let missing = node.has_files_missing(root); | ||
235 | acc || missing | ||
236 | }) | ||
237 | } | ||
238 | |||
239 | /// Get a list of source files. | ||
240 | /// | ||
241 | /// If a file is a generated file, generate it and store it in memory. | ||
242 | fn collect_source_files(&self, root: &Path) -> Result<Vec<SourceFile>> { | ||
243 | let mut result = Vec::new(); | ||
244 | |||
245 | for node in &self.nodes { | ||
246 | node.collect_source_files(&mut result, root, Path::new("."), 0)?; | ||
247 | } | ||
248 | |||
249 | Ok(result) | ||
250 | } | ||
251 | } | ||
252 | |||
253 | impl TryFrom<pandoc_ast::Pandoc> for Summary { | ||
254 | type Error = eyre::Error; | ||
255 | |||
256 | fn try_from(mut document: pandoc_ast::Pandoc) -> Result<Self, Self::Error> { | ||
257 | if document.blocks.len() != 1 { | ||
258 | return Err(eyre!("Summary does not contain a single list")); | ||
259 | } | ||
260 | |||
261 | let root = &mut document.blocks[0]; | ||
262 | |||
263 | let list = list_content(root)?; | ||
264 | |||
265 | let nodes = list | ||
266 | .iter_mut() | ||
267 | .map(Node::try_from_vec_block) | ||
268 | .collect::<Result<_>>()?; | ||
269 | |||
270 | Ok(Summary { | ||
271 | source: document, | ||
272 | nodes, | ||
273 | }) | ||
274 | } | ||
275 | } | ||
276 | |||
277 | #[derive(Debug)] | ||
278 | pub enum Node { | ||
279 | Provided { | ||
280 | file: String, | ||
281 | children: Vec<Node>, | ||
282 | }, | ||
283 | Generated { | ||
284 | file: String, | ||
285 | title: Vec<pandoc_ast::Inline>, | ||
286 | children: Vec<Node>, | ||
287 | }, | ||
288 | } | ||
289 | |||
290 | impl Node { | ||
291 | fn children(&self) -> &[Node] { | ||
292 | match self { | ||
293 | Node::Provided { children, .. } => children, | ||
294 | Node::Generated { children, .. } => children, | ||
295 | } | ||
296 | } | ||
297 | |||
298 | fn has_files_missing(&self, root: &Path) -> bool { | ||
299 | if let Node::Provided { file, .. } = self { | ||
300 | if !root.join(file).exists() { | ||
301 | error!("File '{}' specified in summary does not exists", file); | ||
302 | return true; | ||
303 | } | ||
304 | } | ||
305 | |||
306 | // Do not use `.any()` to prevent short-circuiting, we want to report all missing files | ||
307 | self.children().iter().fold(false, |acc, node| { | ||
308 | let missing = node.has_files_missing(root); | ||
309 | acc || missing | ||
310 | }) | ||
311 | } | ||
312 | |||
313 | fn collect_source_files( | ||
314 | &self, | ||
315 | result: &mut Vec<SourceFile>, | ||
316 | root: &Path, | ||
317 | parent: &Path, | ||
318 | level: usize, | ||
319 | ) -> Result<()> { | ||
320 | let new_parent; | ||
321 | let children_; | ||
322 | let path; | ||
323 | let source: Box<dyn FnOnce(_) -> _>; | ||
324 | |||
325 | match self { | ||
326 | Node::Provided { file, children } => { | ||
327 | trace!("Parsing file: '{}'", file); | ||
328 | |||
329 | // TODO: some filters here? not all filters, since we may want to filter generated | ||
330 | // files too | ||
331 | let mut pandoc_command = pandoc::new(); | ||
332 | pandoc_command | ||
333 | .add_input(&root.join(file)) | ||
334 | .set_output(pandoc::OutputKind::Pipe) | ||
335 | .set_output_format(pandoc::OutputFormat::Json, vec![]); | ||
336 | |||
337 | if log_enabled!(log::Level::Trace) { | ||
338 | pandoc_command.set_show_cmdline(true); | ||
339 | } | ||
340 | |||
341 | let raw_source = pandoc_command | ||
342 | .execute() | ||
343 | .wrap_err_with(|| format!("Failed to parse '{}'", file))? | ||
344 | .buffer(); | ||
345 | source = Box::new(move |_| Ok(pandoc_ast::Pandoc::from_json(&raw_source))); | ||
346 | |||
347 | let file = Path::new(&file); | ||
348 | let stem = file.file_stem().expect("No file name"); | ||
349 | let id = | ||
350 | AutoIdentifier::from(stem.to_str().wrap_err("Invalid unicode in file name")?); | ||
351 | |||
352 | path = file.into(); | ||
353 | new_parent = file.parent().expect("Source file has no parent").join(&*id); | ||
354 | children_ = children; | ||
355 | } | ||
356 | |||
357 | Self::Generated { | ||
358 | file, | ||
359 | title, | ||
360 | children, | ||
361 | } => { | ||
362 | trace!("Found file to generate: '{}'", file); | ||
363 | |||
364 | path = file.into(); | ||
365 | |||
366 | source = Box::new(move |direct_children| { | ||
367 | generate_source(title.clone(), direct_children, level) | ||
368 | }); | ||
369 | new_parent = Path::new(file).with_extension(""); | ||
370 | children_ = children; | ||
371 | } | ||
372 | }; | ||
373 | |||
374 | let mut direct_children = Vec::with_capacity(children_.len()); | ||
375 | |||
376 | for child in children_ { | ||
377 | child.collect_source_files(result, root, &new_parent, level + 1)?; | ||
378 | let direct_child = result.last().unwrap(); | ||
379 | direct_children.push((direct_child.source.meta.clone(), direct_child.path.clone())); | ||
380 | } | ||
381 | |||
382 | result.push(SourceFile { | ||
383 | path, | ||
384 | source: source(direct_children)?, | ||
385 | }); | ||
386 | |||
387 | Ok(()) | ||
388 | } | ||
389 | |||
390 | // Wil also modify the block to linkify generated pages | ||
391 | fn try_from_vec_block(value: &mut Vec<pandoc_ast::Block>) -> Result<Self> { | ||
392 | if value.len() != 1 && value.len() != 2 { | ||
393 | // TODO: better error message? | ||
394 | return Err(eyre!("Summary does not contain a single list")); | ||
395 | } | ||
396 | |||
397 | let mut value = value.iter_mut(); | ||
398 | |||
399 | let item = match value.next().unwrap() { | ||
400 | pandoc_ast::Block::Plain(inlines) => inlines, | ||
401 | pandoc_ast::Block::Para(inlines) => inlines, | ||
402 | _ => return Err(eyre!("List item is not a link or plain text")), | ||
403 | }; | ||
404 | |||
405 | if item.is_empty() { | ||
406 | return Err(eyre!("Summary list items cannot be empty")); | ||
407 | } | ||
408 | |||
409 | let children = if let Some(children) = value.next() { | ||
410 | try_into_node_vec(list_content(children)?)? | ||
411 | } else { | ||
412 | vec![] | ||
413 | }; | ||
414 | |||
415 | match &item[0] { | ||
416 | pandoc_ast::Inline::Link(_, _, target) => { | ||
417 | if item.len() != 1 { | ||
418 | return Err(eyre!("Summary list item not a single link or plain text")); | ||
419 | } | ||
420 | |||
421 | let file = target.0.clone(); | ||
422 | |||
423 | Ok(Node::Provided { file, children }) | ||
424 | } | ||
425 | _ => { | ||
426 | let title = item.clone(); | ||
427 | |||
428 | let id = AutoIdentifier::from(title.as_slice()); | ||
429 | |||
430 | // TODO: missing parent | ||
431 | |||
432 | // Move generate page into this pass | ||
433 | //let mut file = parent.join(&*id); | ||
434 | //file.set_extension("md"); | ||
435 | |||
436 | // TODO: Attribute to style them differently | ||
437 | *item = vec![pandoc_ast::Inline::Link( | ||
438 | (String::new(), vec!["generated".to_string()], vec![]), | ||
439 | item.clone(), | ||
440 | (id.0.clone(), String::new()), | ||
441 | )]; | ||
442 | |||
443 | Ok(Node::Generated { | ||
444 | file: id.0, | ||
445 | title, | ||
446 | children, | ||
447 | }) | ||
448 | } | ||
449 | } | ||
450 | } | ||
451 | } | ||