[go: up one dir, main page]

roff/
lib.rs

1//! A document in the ROFF format.
2//!
3//! [ROFF] is a family of Unix text-formatting languages, implemented
4//! by the `nroff`, `troff`, and `groff` programs, among others. See
5//! [groff(7)] for a description of the language. This structure is an
6//! abstract representation of a document in ROFF format. It is meant
7//! for writing code to generate ROFF documents, such as manual pages.
8//!
9//! # Example
10//!
11//! ```
12//! # use roff::*;
13//! let doc = Roff::new().text(vec![roman("hello, world")]).render();
14//! assert!(doc.ends_with("hello, world\n"));
15//! ```
16//!
17//! [ROFF]: https://en.wikipedia.org/wiki/Roff_(software)
18//! [groff(7)]: https://manpages.debian.org/bullseye/groff/groff.7.en.html
19
20#![cfg_attr(docsrs, feature(doc_auto_cfg))]
21#![warn(missing_docs)]
22#![warn(clippy::print_stderr)]
23#![warn(clippy::print_stdout)]
24
25use std::io::Write;
26use std::write;
27
28/// A ROFF document, consisting of lines.
29///
30/// Lines are either control lines (requests that are built in, or
31/// invocations of macros), or text lines.
32///
33/// # Example
34///
35/// ```
36/// # use roff::*;
37/// let doc = Roff::new()
38///     .control("TH", ["FOO", "1"])
39///     .control("SH", ["NAME"])
40///     .text([roman("foo - do a foo thing")])
41///     .render();
42/// assert!(doc.ends_with(".TH FOO 1\n.SH NAME\nfoo \\- do a foo thing\n"));
43/// ```
44#[derive(Debug, PartialEq, Eq, Default)]
45pub struct Roff {
46    lines: Vec<Line>,
47}
48
49impl Roff {
50    /// Instantiate a `Roff`
51    pub fn new() -> Self {
52        Default::default()
53    }
54
55    /// Append a control line.
56    ///
57    /// The line consist of the name of a built-in command or macro,
58    /// and some number of arguments. Arguments that contain spaces
59    /// will be enclosed with double quotation marks.
60    pub fn control<'a>(
61        &mut self,
62        name: impl Into<String>,
63        args: impl IntoIterator<Item = &'a str>,
64    ) -> &mut Self {
65        self.lines.push(Line::control(
66            name.into(),
67            args.into_iter().map(|s| s.to_owned()).collect(),
68        ));
69        self
70    }
71
72    /// Append a text line.
73    ///
74    /// The line will be rendered in a way that ensures it can't be
75    /// interpreted as a control line. The caller does not need to
76    /// ensure, for example, that the line doesn't start with a
77    /// period ("`.`") or an apostrophe ("`'`").
78    pub fn text(&mut self, inlines: impl Into<Vec<Inline>>) -> &mut Self {
79        self.lines.push(Line::text(inlines.into()));
80        self
81    }
82
83    /// Render as ROFF source text that can be fed to a ROFF implementation.
84    pub fn render(&self) -> String {
85        let mut buf = vec![];
86        self.to_writer(&mut buf).unwrap(); // writing to a Vec always works
87        String::from_utf8(buf)
88            .expect("output is utf8 if all input is utf8 and our API guarantees that")
89    }
90
91    /// Write to a writer.
92    pub fn to_writer(&self, w: &mut dyn Write) -> Result<(), std::io::Error> {
93        w.write_all(APOSTROPHE_PREABMLE.as_bytes())?;
94        for line in self.lines.iter() {
95            line.render(w, Apostrophes::Handle)?;
96        }
97        Ok(())
98    }
99
100    /// Render without handling apostrophes specially.
101    ///
102    /// You probably want [`render`](Roff::render) or
103    /// [`to_writer`](Roff::to_writer) instead of this method.
104    ///
105    /// Without special handling, apostrophes get typeset as right
106    /// single quotes, including in words like "don't". In most
107    /// situations, such as in manual pages, that's unwanted. The
108    /// other methods handle apostrophes specially to prevent it, but
109    /// for completeness, and for testing, this method is provided to
110    /// avoid it.
111    pub fn to_roff(&self) -> String {
112        let mut buf = vec![];
113        for line in self.lines.iter() {
114            // Writing to a Vec always works, so we discard any error.
115            line.render(&mut buf, Apostrophes::DontHandle).unwrap();
116        }
117        String::from_utf8(buf)
118            .expect("output is utf8 if all input is utf8 and our API guarantees that")
119    }
120}
121
122impl<I: Into<Inline>> From<I> for Roff {
123    fn from(other: I) -> Self {
124        let mut r = Roff::new();
125        r.text([other.into()]);
126        r
127    }
128}
129
130impl<R: Into<Roff>> FromIterator<R> for Roff {
131    fn from_iter<I: IntoIterator<Item = R>>(iter: I) -> Self {
132        let mut r = Roff::new();
133        for i in iter {
134            r.lines.extend(i.into().lines);
135        }
136        r
137    }
138}
139
140impl<R: Into<Roff>> Extend<R> for Roff {
141    fn extend<T: IntoIterator<Item = R>>(&mut self, iter: T) {
142        for i in iter {
143            self.lines.extend(i.into().lines);
144        }
145    }
146}
147
148/// A part of a text line.
149///
150/// Text will be escaped for ROFF. No inline escape sequences will be
151/// passed to ROFF. The text may contain newlines, but leading periods
152/// will be escaped so that they won't be interpreted by ROFF as
153/// control lines.
154///
155/// Note that the strings stored in the variants are stored as they're
156/// received from the API user. The `Line::render` function handles
157/// escaping etc.
158#[derive(Debug, PartialEq, Eq, Clone)]
159pub enum Inline {
160    /// Text in the "roman" font, which is the normal font if nothing
161    /// else is specified.
162    Roman(String),
163
164    /// Text in the italic (slanted) font.
165    Italic(String),
166
167    /// Text in a bold face font.
168    Bold(String),
169
170    /// A hard line break. This is an inline element so it's easy to
171    /// insert a line break in a paragraph.
172    LineBreak,
173}
174
175/// Turn a string slice into inline text in the roman font.
176///
177/// This is equivalent to the [roman] function, but may be more
178/// convenient to use.
179impl<S: Into<String>> From<S> for Inline {
180    fn from(s: S) -> Self {
181        roman(s)
182    }
183}
184
185/// Return some inline text in the "roman" font.
186///
187/// The roman font is the normal font, if no other font is chosen.
188pub fn roman(input: impl Into<String>) -> Inline {
189    Inline::Roman(input.into())
190}
191
192/// Return some inline text in the bold font.
193pub fn bold(input: impl Into<String>) -> Inline {
194    Inline::Bold(input.into())
195}
196
197/// Return some inline text in the italic font.
198pub fn italic(input: impl Into<String>) -> Inline {
199    Inline::Italic(input.into())
200}
201
202/// Return an inline element for a hard line break.
203pub fn line_break() -> Inline {
204    Inline::LineBreak
205}
206
207/// A line in a ROFF document.
208#[derive(Debug, PartialEq, Eq, Clone)]
209pub(crate) enum Line {
210    /// A control line.
211    Control {
212        /// Name of control request or macro being invoked.
213        name: String,
214
215        /// Arguments on control line.
216        args: Vec<String>,
217    },
218
219    /// A text line.
220    Text(Vec<Inline>),
221}
222
223impl Line {
224    /// Append a control line.
225    pub(crate) fn control(name: String, args: Vec<String>) -> Self {
226        Self::Control { name, args }
227    }
228
229    /// Append a text line, consisting of inline elements.
230    pub(crate) fn text(parts: Vec<Inline>) -> Self {
231        Self::Text(parts)
232    }
233
234    /// Generate a ROFF line.
235    ///
236    /// All the ROFF code generation and special handling happens here.
237    fn render(
238        &self,
239        out: &mut dyn Write,
240        handle_apostrophes: Apostrophes,
241    ) -> Result<(), std::io::Error> {
242        match self {
243            Self::Control { name, args } => {
244                write!(out, ".{}", name)?;
245                for arg in args {
246                    write!(out, " {}", &escape_spaces(arg))?;
247                }
248            }
249            Self::Text(inlines) => {
250                let mut at_line_start = true;
251                for inline in inlines.iter() {
252                    // We need to handle line breaking specially: it
253                    // introduces a control line to the ROFF, and the
254                    // leading period of that mustn't be escaped.
255                    match inline {
256                        Inline::LineBreak => {
257                            if at_line_start {
258                                writeln!(out, ".br")?;
259                            } else {
260                                writeln!(out, "\n.br")?;
261                            }
262                        }
263                        Inline::Roman(text) | Inline::Italic(text) | Inline::Bold(text) => {
264                            let mut text = escape_inline(text);
265                            if handle_apostrophes == Apostrophes::Handle {
266                                text = escape_apostrophes(&text);
267                            };
268                            let text = escape_leading_cc(&text);
269                            if let Inline::Bold(_) = inline {
270                                write!(out, r"\fB{}\fR", text)?;
271                            } else if let Inline::Italic(_) = inline {
272                                write!(out, r"\fI{}\fR", text)?;
273                            } else {
274                                if at_line_start && starts_with_cc(&text) {
275                                    // Line would start with a period, so we
276                                    // insert a non-printable, zero-width glyph to
277                                    // prevent it from being interpreted as such.
278                                    // We only do that when it's needed, though,
279                                    // to avoid making the output ugly.
280                                    //
281                                    // Note that this isn't handled by
282                                    // escape_leading_cc, as it
283                                    // doesn't know when an inline
284                                    // element is at the start of a
285                                    // line.
286                                    write!(out, r"\&").unwrap();
287                                }
288                                write!(out, "{}", text)?;
289                            }
290                        }
291                    }
292                    at_line_start = false;
293                }
294            }
295        };
296        writeln!(out)?;
297        Ok(())
298    }
299}
300
301/// Does line start with a control character?
302fn starts_with_cc(line: &str) -> bool {
303    line.starts_with('.') || line.starts_with('\'')
304}
305
306/// This quotes strings with spaces. This doesn't handle strings with
307/// quotes in any way: there doesn't seem to a way to escape them.
308fn escape_spaces(w: &str) -> String {
309    if w.contains(' ') {
310        format!("\"{}\"", w)
311    } else {
312        w.to_owned()
313    }
314}
315
316/// Prevent leading periods or apostrophes on lines to be interpreted
317/// as control lines. Note that this needs to be done for apostrophes
318/// whether they need special handling for typesetting or not: a
319/// leading apostrophe on a line indicates a control line.
320fn escape_leading_cc(s: &str) -> String {
321    s.replace("\n.", "\n\\&.").replace("\n'", "\n\\&'")
322}
323
324/// Escape anything that may be interpreted by the roff processor in a
325/// text line: dashes and backslashes are escaped with a backslash.
326/// Apostrophes are not handled.
327fn escape_inline(text: &str) -> String {
328    text.replace('\\', r"\\").replace('-', r"\-")
329}
330
331/// Handle apostrophes.
332fn escape_apostrophes(text: &str) -> String {
333    text.replace('\'', APOSTROPHE)
334}
335
336#[derive(Eq, PartialEq)]
337enum Apostrophes {
338    Handle,
339    DontHandle,
340}
341
342/// Use the apostrophe string variable.
343const APOSTROPHE: &str = r"\*(Aq";
344
345/// A preamble added to the start of rendered output.
346///
347/// This defines a string variable that contains an apostrophe. For
348/// historical reasons, there seems to be no other portable way to
349/// represent apostrophes across various implementations of the ROFF
350/// language. In implementations that produce output like `PostScript`
351/// or PDF, an apostrophe gets typeset as a right single quote, which
352/// looks different from an apostrophe. For terminal output ("ASCII"),
353/// such as when using nroff, an apostrophe looks indistinguishable
354/// from a right single quote. For manual pages, and similar content,
355/// an apostrophe is more generally desired than the right single
356/// quote, so we convert all apostrophe characters in input text into
357/// a use of the string variable defined in the preamble.
358///
359/// The special handling of apostrophes is avoided in the
360/// [`to_roff`](Roff::to_roff) method, but it's used in the
361/// [`render`](Roff::render) and [`to_writer`](Roff::to_writer)
362/// methods.
363///
364/// See: <https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=507673#65>
365const APOSTROPHE_PREABMLE: &str = r#".ie \n(.g .ds Aq \(aq
366.el .ds Aq '
367"#;
368
369#[cfg(test)]
370mod test {
371    use super::*;
372
373    #[test]
374    fn escape_dash() {
375        assert_eq!(r"\-", escape_inline("-"));
376    }
377
378    #[test]
379    fn escape_backslash() {
380        assert_eq!(r"\\x", escape_inline(r"\x"));
381    }
382
383    #[test]
384    fn escape_backslash_and_dash() {
385        assert_eq!(r"\\\-", escape_inline(r"\-"));
386    }
387
388    #[test]
389    fn escapes_leading_control_chars() {
390        assert_eq!("foo\n\\&.bar\n\\&'yo", escape_leading_cc("foo\n.bar\n'yo"));
391    }
392
393    #[test]
394    fn escape_plain() {
395        assert_eq!("abc", escape_inline("abc"));
396    }
397
398    #[test]
399    fn render_roman() {
400        let text = Roff::new().text([roman("foo")]).to_roff();
401        assert_eq!(text, "foo\n");
402    }
403
404    #[test]
405    fn render_dash() {
406        let text = Roff::new().text([roman("foo-bar")]).to_roff();
407        assert_eq!(text, "foo\\-bar\n");
408    }
409
410    #[test]
411    fn render_italic() {
412        let text = Roff::new().text([italic("foo")]).to_roff();
413        assert_eq!(text, "\\fIfoo\\fR\n");
414    }
415
416    #[test]
417    fn render_bold() {
418        let text = Roff::new().text([bold("foo")]).to_roff();
419        assert_eq!(text, "\\fBfoo\\fR\n");
420    }
421
422    #[test]
423    fn render_text() {
424        let text = Roff::new().text([roman("roman")]).to_roff();
425        assert_eq!(text, "roman\n");
426    }
427
428    #[test]
429    fn render_text_with_leading_period() {
430        let text = Roff::new().text([roman(".roman")]).to_roff();
431        assert_eq!(text, "\\&.roman\n");
432    }
433
434    #[test]
435    fn render_text_with_newline_period() {
436        let text = Roff::new().text([roman("foo\n.roman")]).to_roff();
437        assert_eq!(text, "foo\n\\&.roman\n");
438    }
439    #[test]
440    fn render_line_break() {
441        let text = Roff::new()
442            .text([roman("roman"), Inline::LineBreak, roman("more")])
443            .to_roff();
444        assert_eq!(text, "roman\n.br\nmore\n");
445    }
446
447    #[test]
448    fn render_control() {
449        let text = Roff::new().control("foo", ["bar", "foo and bar"]).to_roff();
450        assert_eq!(text, ".foo bar \"foo and bar\"\n");
451    }
452}