roff/lib.rs
1//! A document in the ROFF format.
2//!
3//! [ROFF] is a family of Unix text-formatting languages, implemented
4//! by the `nroff`, `troff`, and `groff` programs, among others. See
5//! [groff(7)] for a description of the language. This structure is an
6//! abstract representation of a document in ROFF format. It is meant
7//! for writing code to generate ROFF documents, such as manual pages.
8//!
9//! # Example
10//!
11//! ```
12//! # use roff::*;
13//! let doc = Roff::new().text(vec![roman("hello, world")]).render();
14//! assert!(doc.ends_with("hello, world\n"));
15//! ```
16//!
17//! [ROFF]: https://en.wikipedia.org/wiki/Roff_(software)
18//! [groff(7)]: https://manpages.debian.org/bullseye/groff/groff.7.en.html
19
20#![cfg_attr(docsrs, feature(doc_auto_cfg))]
21#![warn(missing_docs)]
22#![warn(clippy::print_stderr)]
23#![warn(clippy::print_stdout)]
24
25use std::io::Write;
26use std::write;
27
28/// A ROFF document, consisting of lines.
29///
30/// Lines are either control lines (requests that are built in, or
31/// invocations of macros), or text lines.
32///
33/// # Example
34///
35/// ```
36/// # use roff::*;
37/// let doc = Roff::new()
38/// .control("TH", ["FOO", "1"])
39/// .control("SH", ["NAME"])
40/// .text([roman("foo - do a foo thing")])
41/// .render();
42/// assert!(doc.ends_with(".TH FOO 1\n.SH NAME\nfoo \\- do a foo thing\n"));
43/// ```
44#[derive(Debug, PartialEq, Eq, Default)]
45pub struct Roff {
46 lines: Vec<Line>,
47}
48
49impl Roff {
50 /// Instantiate a `Roff`
51 pub fn new() -> Self {
52 Default::default()
53 }
54
55 /// Append a control line.
56 ///
57 /// The line consist of the name of a built-in command or macro,
58 /// and some number of arguments. Arguments that contain spaces
59 /// will be enclosed with double quotation marks.
60 pub fn control<'a>(
61 &mut self,
62 name: impl Into<String>,
63 args: impl IntoIterator<Item = &'a str>,
64 ) -> &mut Self {
65 self.lines.push(Line::control(
66 name.into(),
67 args.into_iter().map(|s| s.to_owned()).collect(),
68 ));
69 self
70 }
71
72 /// Append a text line.
73 ///
74 /// The line will be rendered in a way that ensures it can't be
75 /// interpreted as a control line. The caller does not need to
76 /// ensure, for example, that the line doesn't start with a
77 /// period ("`.`") or an apostrophe ("`'`").
78 pub fn text(&mut self, inlines: impl Into<Vec<Inline>>) -> &mut Self {
79 self.lines.push(Line::text(inlines.into()));
80 self
81 }
82
83 /// Render as ROFF source text that can be fed to a ROFF implementation.
84 pub fn render(&self) -> String {
85 let mut buf = vec![];
86 self.to_writer(&mut buf).unwrap(); // writing to a Vec always works
87 String::from_utf8(buf)
88 .expect("output is utf8 if all input is utf8 and our API guarantees that")
89 }
90
91 /// Write to a writer.
92 pub fn to_writer(&self, w: &mut dyn Write) -> Result<(), std::io::Error> {
93 w.write_all(APOSTROPHE_PREABMLE.as_bytes())?;
94 for line in self.lines.iter() {
95 line.render(w, Apostrophes::Handle)?;
96 }
97 Ok(())
98 }
99
100 /// Render without handling apostrophes specially.
101 ///
102 /// You probably want [`render`](Roff::render) or
103 /// [`to_writer`](Roff::to_writer) instead of this method.
104 ///
105 /// Without special handling, apostrophes get typeset as right
106 /// single quotes, including in words like "don't". In most
107 /// situations, such as in manual pages, that's unwanted. The
108 /// other methods handle apostrophes specially to prevent it, but
109 /// for completeness, and for testing, this method is provided to
110 /// avoid it.
111 pub fn to_roff(&self) -> String {
112 let mut buf = vec![];
113 for line in self.lines.iter() {
114 // Writing to a Vec always works, so we discard any error.
115 line.render(&mut buf, Apostrophes::DontHandle).unwrap();
116 }
117 String::from_utf8(buf)
118 .expect("output is utf8 if all input is utf8 and our API guarantees that")
119 }
120}
121
122impl<I: Into<Inline>> From<I> for Roff {
123 fn from(other: I) -> Self {
124 let mut r = Roff::new();
125 r.text([other.into()]);
126 r
127 }
128}
129
130impl<R: Into<Roff>> FromIterator<R> for Roff {
131 fn from_iter<I: IntoIterator<Item = R>>(iter: I) -> Self {
132 let mut r = Roff::new();
133 for i in iter {
134 r.lines.extend(i.into().lines);
135 }
136 r
137 }
138}
139
140impl<R: Into<Roff>> Extend<R> for Roff {
141 fn extend<T: IntoIterator<Item = R>>(&mut self, iter: T) {
142 for i in iter {
143 self.lines.extend(i.into().lines);
144 }
145 }
146}
147
148/// A part of a text line.
149///
150/// Text will be escaped for ROFF. No inline escape sequences will be
151/// passed to ROFF. The text may contain newlines, but leading periods
152/// will be escaped so that they won't be interpreted by ROFF as
153/// control lines.
154///
155/// Note that the strings stored in the variants are stored as they're
156/// received from the API user. The `Line::render` function handles
157/// escaping etc.
158#[derive(Debug, PartialEq, Eq, Clone)]
159pub enum Inline {
160 /// Text in the "roman" font, which is the normal font if nothing
161 /// else is specified.
162 Roman(String),
163
164 /// Text in the italic (slanted) font.
165 Italic(String),
166
167 /// Text in a bold face font.
168 Bold(String),
169
170 /// A hard line break. This is an inline element so it's easy to
171 /// insert a line break in a paragraph.
172 LineBreak,
173}
174
175/// Turn a string slice into inline text in the roman font.
176///
177/// This is equivalent to the [roman] function, but may be more
178/// convenient to use.
179impl<S: Into<String>> From<S> for Inline {
180 fn from(s: S) -> Self {
181 roman(s)
182 }
183}
184
185/// Return some inline text in the "roman" font.
186///
187/// The roman font is the normal font, if no other font is chosen.
188pub fn roman(input: impl Into<String>) -> Inline {
189 Inline::Roman(input.into())
190}
191
192/// Return some inline text in the bold font.
193pub fn bold(input: impl Into<String>) -> Inline {
194 Inline::Bold(input.into())
195}
196
197/// Return some inline text in the italic font.
198pub fn italic(input: impl Into<String>) -> Inline {
199 Inline::Italic(input.into())
200}
201
202/// Return an inline element for a hard line break.
203pub fn line_break() -> Inline {
204 Inline::LineBreak
205}
206
207/// A line in a ROFF document.
208#[derive(Debug, PartialEq, Eq, Clone)]
209pub(crate) enum Line {
210 /// A control line.
211 Control {
212 /// Name of control request or macro being invoked.
213 name: String,
214
215 /// Arguments on control line.
216 args: Vec<String>,
217 },
218
219 /// A text line.
220 Text(Vec<Inline>),
221}
222
223impl Line {
224 /// Append a control line.
225 pub(crate) fn control(name: String, args: Vec<String>) -> Self {
226 Self::Control { name, args }
227 }
228
229 /// Append a text line, consisting of inline elements.
230 pub(crate) fn text(parts: Vec<Inline>) -> Self {
231 Self::Text(parts)
232 }
233
234 /// Generate a ROFF line.
235 ///
236 /// All the ROFF code generation and special handling happens here.
237 fn render(
238 &self,
239 out: &mut dyn Write,
240 handle_apostrophes: Apostrophes,
241 ) -> Result<(), std::io::Error> {
242 match self {
243 Self::Control { name, args } => {
244 write!(out, ".{}", name)?;
245 for arg in args {
246 write!(out, " {}", &escape_spaces(arg))?;
247 }
248 }
249 Self::Text(inlines) => {
250 let mut at_line_start = true;
251 for inline in inlines.iter() {
252 // We need to handle line breaking specially: it
253 // introduces a control line to the ROFF, and the
254 // leading period of that mustn't be escaped.
255 match inline {
256 Inline::LineBreak => {
257 if at_line_start {
258 writeln!(out, ".br")?;
259 } else {
260 writeln!(out, "\n.br")?;
261 }
262 }
263 Inline::Roman(text) | Inline::Italic(text) | Inline::Bold(text) => {
264 let mut text = escape_inline(text);
265 if handle_apostrophes == Apostrophes::Handle {
266 text = escape_apostrophes(&text);
267 };
268 let text = escape_leading_cc(&text);
269 if let Inline::Bold(_) = inline {
270 write!(out, r"\fB{}\fR", text)?;
271 } else if let Inline::Italic(_) = inline {
272 write!(out, r"\fI{}\fR", text)?;
273 } else {
274 if at_line_start && starts_with_cc(&text) {
275 // Line would start with a period, so we
276 // insert a non-printable, zero-width glyph to
277 // prevent it from being interpreted as such.
278 // We only do that when it's needed, though,
279 // to avoid making the output ugly.
280 //
281 // Note that this isn't handled by
282 // escape_leading_cc, as it
283 // doesn't know when an inline
284 // element is at the start of a
285 // line.
286 write!(out, r"\&").unwrap();
287 }
288 write!(out, "{}", text)?;
289 }
290 }
291 }
292 at_line_start = false;
293 }
294 }
295 };
296 writeln!(out)?;
297 Ok(())
298 }
299}
300
301/// Does line start with a control character?
302fn starts_with_cc(line: &str) -> bool {
303 line.starts_with('.') || line.starts_with('\'')
304}
305
306/// This quotes strings with spaces. This doesn't handle strings with
307/// quotes in any way: there doesn't seem to a way to escape them.
308fn escape_spaces(w: &str) -> String {
309 if w.contains(' ') {
310 format!("\"{}\"", w)
311 } else {
312 w.to_owned()
313 }
314}
315
316/// Prevent leading periods or apostrophes on lines to be interpreted
317/// as control lines. Note that this needs to be done for apostrophes
318/// whether they need special handling for typesetting or not: a
319/// leading apostrophe on a line indicates a control line.
320fn escape_leading_cc(s: &str) -> String {
321 s.replace("\n.", "\n\\&.").replace("\n'", "\n\\&'")
322}
323
324/// Escape anything that may be interpreted by the roff processor in a
325/// text line: dashes and backslashes are escaped with a backslash.
326/// Apostrophes are not handled.
327fn escape_inline(text: &str) -> String {
328 text.replace('\\', r"\\").replace('-', r"\-")
329}
330
331/// Handle apostrophes.
332fn escape_apostrophes(text: &str) -> String {
333 text.replace('\'', APOSTROPHE)
334}
335
336#[derive(Eq, PartialEq)]
337enum Apostrophes {
338 Handle,
339 DontHandle,
340}
341
342/// Use the apostrophe string variable.
343const APOSTROPHE: &str = r"\*(Aq";
344
345/// A preamble added to the start of rendered output.
346///
347/// This defines a string variable that contains an apostrophe. For
348/// historical reasons, there seems to be no other portable way to
349/// represent apostrophes across various implementations of the ROFF
350/// language. In implementations that produce output like `PostScript`
351/// or PDF, an apostrophe gets typeset as a right single quote, which
352/// looks different from an apostrophe. For terminal output ("ASCII"),
353/// such as when using nroff, an apostrophe looks indistinguishable
354/// from a right single quote. For manual pages, and similar content,
355/// an apostrophe is more generally desired than the right single
356/// quote, so we convert all apostrophe characters in input text into
357/// a use of the string variable defined in the preamble.
358///
359/// The special handling of apostrophes is avoided in the
360/// [`to_roff`](Roff::to_roff) method, but it's used in the
361/// [`render`](Roff::render) and [`to_writer`](Roff::to_writer)
362/// methods.
363///
364/// See: <https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=507673#65>
365const APOSTROPHE_PREABMLE: &str = r#".ie \n(.g .ds Aq \(aq
366.el .ds Aq '
367"#;
368
369#[cfg(test)]
370mod test {
371 use super::*;
372
373 #[test]
374 fn escape_dash() {
375 assert_eq!(r"\-", escape_inline("-"));
376 }
377
378 #[test]
379 fn escape_backslash() {
380 assert_eq!(r"\\x", escape_inline(r"\x"));
381 }
382
383 #[test]
384 fn escape_backslash_and_dash() {
385 assert_eq!(r"\\\-", escape_inline(r"\-"));
386 }
387
388 #[test]
389 fn escapes_leading_control_chars() {
390 assert_eq!("foo\n\\&.bar\n\\&'yo", escape_leading_cc("foo\n.bar\n'yo"));
391 }
392
393 #[test]
394 fn escape_plain() {
395 assert_eq!("abc", escape_inline("abc"));
396 }
397
398 #[test]
399 fn render_roman() {
400 let text = Roff::new().text([roman("foo")]).to_roff();
401 assert_eq!(text, "foo\n");
402 }
403
404 #[test]
405 fn render_dash() {
406 let text = Roff::new().text([roman("foo-bar")]).to_roff();
407 assert_eq!(text, "foo\\-bar\n");
408 }
409
410 #[test]
411 fn render_italic() {
412 let text = Roff::new().text([italic("foo")]).to_roff();
413 assert_eq!(text, "\\fIfoo\\fR\n");
414 }
415
416 #[test]
417 fn render_bold() {
418 let text = Roff::new().text([bold("foo")]).to_roff();
419 assert_eq!(text, "\\fBfoo\\fR\n");
420 }
421
422 #[test]
423 fn render_text() {
424 let text = Roff::new().text([roman("roman")]).to_roff();
425 assert_eq!(text, "roman\n");
426 }
427
428 #[test]
429 fn render_text_with_leading_period() {
430 let text = Roff::new().text([roman(".roman")]).to_roff();
431 assert_eq!(text, "\\&.roman\n");
432 }
433
434 #[test]
435 fn render_text_with_newline_period() {
436 let text = Roff::new().text([roman("foo\n.roman")]).to_roff();
437 assert_eq!(text, "foo\n\\&.roman\n");
438 }
439 #[test]
440 fn render_line_break() {
441 let text = Roff::new()
442 .text([roman("roman"), Inline::LineBreak, roman("more")])
443 .to_roff();
444 assert_eq!(text, "roman\n.br\nmore\n");
445 }
446
447 #[test]
448 fn render_control() {
449 let text = Roff::new().control("foo", ["bar", "foo and bar"]).to_roff();
450 assert_eq!(text, ".foo bar \"foo and bar\"\n");
451 }
452}