[go: up one dir, main page]

uucore/
lib.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5//! library ~ (core/bundler file)
6// #![deny(missing_docs)] //TODO: enable this
7//
8// spell-checker:ignore sigaction SIGBUS SIGSEGV extendedbigdecimal
9
10// * feature-gated external crates (re-shared as public internal modules)
11#[cfg(feature = "libc")]
12pub extern crate libc;
13#[cfg(all(feature = "windows-sys", target_os = "windows"))]
14pub extern crate windows_sys;
15
16//## internal modules
17
18mod features; // feature-gated code modules
19mod macros; // crate macros (macro_rules-type; exported to `crate::...`)
20mod mods; // core cross-platform modules
21
22pub use uucore_procs::*;
23
24// * cross-platform modules
25pub use crate::mods::display;
26pub use crate::mods::error;
27#[cfg(feature = "fs")]
28pub use crate::mods::io;
29pub use crate::mods::line_ending;
30pub use crate::mods::locale;
31pub use crate::mods::os;
32pub use crate::mods::panic;
33pub use crate::mods::posix;
34
35// * feature-gated modules
36#[cfg(feature = "backup-control")]
37pub use crate::features::backup_control;
38#[cfg(feature = "buf-copy")]
39pub use crate::features::buf_copy;
40#[cfg(feature = "checksum")]
41pub use crate::features::checksum;
42#[cfg(feature = "colors")]
43pub use crate::features::colors;
44#[cfg(feature = "custom-tz-fmt")]
45pub use crate::features::custom_tz_fmt;
46#[cfg(feature = "encoding")]
47pub use crate::features::encoding;
48#[cfg(feature = "extendedbigdecimal")]
49pub use crate::features::extendedbigdecimal;
50#[cfg(feature = "fast-inc")]
51pub use crate::features::fast_inc;
52#[cfg(feature = "format")]
53pub use crate::features::format;
54#[cfg(feature = "fs")]
55pub use crate::features::fs;
56#[cfg(feature = "lines")]
57pub use crate::features::lines;
58#[cfg(feature = "parser")]
59pub use crate::features::parser;
60#[cfg(feature = "quoting-style")]
61pub use crate::features::quoting_style;
62#[cfg(feature = "ranges")]
63pub use crate::features::ranges;
64#[cfg(feature = "ringbuffer")]
65pub use crate::features::ringbuffer;
66#[cfg(feature = "sum")]
67pub use crate::features::sum;
68#[cfg(feature = "update-control")]
69pub use crate::features::update_control;
70#[cfg(feature = "uptime")]
71pub use crate::features::uptime;
72#[cfg(feature = "version-cmp")]
73pub use crate::features::version_cmp;
74
75// * (platform-specific) feature-gated modules
76// ** non-windows (i.e. Unix + Fuchsia)
77#[cfg(all(not(windows), feature = "mode"))]
78pub use crate::features::mode;
79// ** unix-only
80#[cfg(all(unix, feature = "entries"))]
81pub use crate::features::entries;
82#[cfg(all(unix, feature = "perms"))]
83pub use crate::features::perms;
84#[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))]
85pub use crate::features::pipes;
86#[cfg(all(unix, feature = "process"))]
87pub use crate::features::process;
88#[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))]
89pub use crate::features::signals;
90#[cfg(all(
91    unix,
92    not(target_os = "android"),
93    not(target_os = "fuchsia"),
94    not(target_os = "openbsd"),
95    not(target_os = "redox"),
96    feature = "utmpx"
97))]
98pub use crate::features::utmpx;
99// ** windows-only
100#[cfg(all(windows, feature = "wide"))]
101pub use crate::features::wide;
102
103#[cfg(feature = "fsext")]
104pub use crate::features::fsext;
105
106#[cfg(all(unix, feature = "fsxattr"))]
107pub use crate::features::fsxattr;
108
109#[cfg(all(target_os = "linux", feature = "selinux"))]
110pub use crate::features::selinux;
111
112//## core functions
113
114#[cfg(unix)]
115use nix::errno::Errno;
116#[cfg(unix)]
117use nix::sys::signal::{
118    SaFlags, SigAction, SigHandler::SigDfl, SigSet, Signal::SIGBUS, Signal::SIGSEGV, sigaction,
119};
120use std::borrow::Cow;
121use std::ffi::{OsStr, OsString};
122use std::io::{BufRead, BufReader};
123use std::iter;
124#[cfg(unix)]
125use std::os::unix::ffi::{OsStrExt, OsStringExt};
126use std::str;
127use std::sync::{LazyLock, atomic::Ordering};
128
129/// Disables the custom signal handlers installed by Rust for stack-overflow handling. With those custom signal handlers processes ignore the first SIGBUS and SIGSEGV signal they receive.
130/// See <https://github.com/rust-lang/rust/blob/8ac1525e091d3db28e67adcbbd6db1e1deaa37fb/src/libstd/sys/unix/stack_overflow.rs#L71-L92> for details.
131#[cfg(unix)]
132pub fn disable_rust_signal_handlers() -> Result<(), Errno> {
133    unsafe {
134        sigaction(
135            SIGSEGV,
136            &SigAction::new(SigDfl, SaFlags::empty(), SigSet::all()),
137        )
138    }?;
139    unsafe {
140        sigaction(
141            SIGBUS,
142            &SigAction::new(SigDfl, SaFlags::empty(), SigSet::all()),
143        )
144    }?;
145    Ok(())
146}
147
148/// Execute utility code for `util`.
149///
150/// This macro expands to a main function that invokes the `uumain` function in `util`
151/// Exits with code returned by `uumain`.
152#[macro_export]
153macro_rules! bin {
154    ($util:ident) => {
155        pub fn main() {
156            use std::io::Write;
157            // suppress extraneous error output for SIGPIPE failures/panics
158            uucore::panic::mute_sigpipe_panic();
159            // execute utility code
160            let code = $util::uumain(uucore::args_os());
161            // (defensively) flush stdout for utility prior to exit; see <https://github.com/rust-lang/rust/issues/23818>
162            if let Err(e) = std::io::stdout().flush() {
163                eprintln!("Error flushing stdout: {e}");
164            }
165
166            std::process::exit(code);
167        }
168    };
169}
170
171/// Generate the version string for clap.
172///
173/// The generated string has the format `(<project name>) <version>`, for
174/// example: "(uutils coreutils) 0.30.0". clap will then prefix it with the util name.
175///
176/// To use this macro, you have to add `PROJECT_NAME_FOR_VERSION_STRING = "<project name>"` to the
177/// `[env]` section in `.cargo/config.toml`.
178#[macro_export]
179macro_rules! crate_version {
180    () => {
181        concat!(
182            "(",
183            env!("PROJECT_NAME_FOR_VERSION_STRING"),
184            ") ",
185            env!("CARGO_PKG_VERSION")
186        )
187    };
188}
189
190/// Generate the usage string for clap.
191///
192/// This function does two things. It indents all but the first line to align
193/// the lines because clap adds "Usage: " to the first line. And it replaces
194/// all occurrences of `{}` with the execution phrase and returns the resulting
195/// `String`. It does **not** support more advanced formatting features such
196/// as `{0}`.
197pub fn format_usage(s: &str) -> String {
198    let s = s.replace('\n', &format!("\n{}", " ".repeat(7)));
199    s.replace("{}", crate::execution_phrase())
200}
201
202/// Used to check if the utility is the second argument.
203/// Used to check if we were called as a multicall binary (`coreutils <utility>`)
204pub fn get_utility_is_second_arg() -> bool {
205    crate::macros::UTILITY_IS_SECOND_ARG.load(Ordering::SeqCst)
206}
207
208/// Change the value of `UTILITY_IS_SECOND_ARG` to true
209/// Used to specify that the utility is the second argument.
210pub fn set_utility_is_second_arg() {
211    crate::macros::UTILITY_IS_SECOND_ARG.store(true, Ordering::SeqCst);
212}
213
214// args_os() can be expensive to call, it copies all of argv before iterating.
215// So if we want only the first arg or so it's overkill. We cache it.
216static ARGV: LazyLock<Vec<OsString>> = LazyLock::new(|| wild::args_os().collect());
217
218static UTIL_NAME: LazyLock<String> = LazyLock::new(|| {
219    let base_index = usize::from(get_utility_is_second_arg());
220    let is_man = usize::from(ARGV[base_index].eq("manpage"));
221    let argv_index = base_index + is_man;
222
223    ARGV[argv_index].to_string_lossy().into_owned()
224});
225
226/// Derive the utility name.
227pub fn util_name() -> &'static str {
228    &UTIL_NAME
229}
230
231static EXECUTION_PHRASE: LazyLock<String> = LazyLock::new(|| {
232    if get_utility_is_second_arg() {
233        ARGV.iter()
234            .take(2)
235            .map(|os_str| os_str.to_string_lossy().into_owned())
236            .collect::<Vec<_>>()
237            .join(" ")
238    } else {
239        ARGV[0].to_string_lossy().into_owned()
240    }
241});
242
243/// Derive the complete execution phrase for "usage".
244pub fn execution_phrase() -> &'static str {
245    &EXECUTION_PHRASE
246}
247
248/// Args contains arguments passed to the utility.
249/// It is a trait that extends `Iterator<Item = OsString>`.
250/// It provides utility functions to collect the arguments into a `Vec<String>`.
251/// The collected `Vec<String>` can be lossy or ignore invalid encoding.
252pub trait Args: Iterator<Item = OsString> + Sized {
253    /// Collects the iterator into a `Vec<String>`, lossily converting the `OsString`s to `Strings`.
254    fn collect_lossy(self) -> Vec<String> {
255        self.map(|s| s.to_string_lossy().into_owned()).collect()
256    }
257
258    /// Collects the iterator into a `Vec<String>`, removing any elements that contain invalid encoding.
259    fn collect_ignore(self) -> Vec<String> {
260        self.filter_map(|s| s.into_string().ok()).collect()
261    }
262}
263
264impl<T: Iterator<Item = OsString> + Sized> Args for T {}
265
266/// Returns an iterator over the command line arguments as `OsString`s.
267/// args_os() can be expensive to call
268pub fn args_os() -> impl Iterator<Item = OsString> {
269    ARGV.iter().cloned()
270}
271
272/// Read a line from stdin and check whether the first character is `'y'` or `'Y'`
273pub fn read_yes() -> bool {
274    let mut s = String::new();
275    match std::io::stdin().read_line(&mut s) {
276        Ok(_) => matches!(s.chars().next(), Some('y' | 'Y')),
277        _ => false,
278    }
279}
280
281/// Converts an `OsStr` to a UTF-8 `&[u8]`.
282///
283/// This always succeeds on unix platforms,
284/// and fails on other platforms if the string can't be coerced to UTF-8.
285pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
286    #[cfg(unix)]
287    let bytes = os_string.as_bytes();
288
289    #[cfg(not(unix))]
290    let bytes = os_string
291        .to_str()
292        .ok_or_else(|| {
293            mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
294        })?
295        .as_bytes();
296
297    Ok(bytes)
298}
299
300/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
301///
302/// This is always lossless on unix platforms,
303/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
304pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
305    #[cfg(unix)]
306    let bytes = Cow::from(os_string.as_bytes());
307
308    #[cfg(not(unix))]
309    let bytes = match os_string.to_string_lossy() {
310        Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
311        Cow::Owned(owned) => Cow::from(owned.into_bytes()),
312    };
313
314    bytes
315}
316
317/// Converts a `&[u8]` to an `&OsStr`,
318/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
319///
320/// This always succeeds on unix platforms,
321/// and fails on other platforms if the bytes can't be parsed as UTF-8.
322pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
323    #[cfg(unix)]
324    let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
325    #[cfg(not(unix))]
326    let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| {
327        mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr")
328    })?));
329
330    Ok(os_str)
331}
332
333/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
334///
335/// This always succeeds on unix platforms,
336/// and fails on other platforms if the bytes can't be parsed as UTF-8.
337pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
338    #[cfg(unix)]
339    let s = OsString::from_vec(vec);
340    #[cfg(not(unix))]
341    let s = OsString::from(String::from_utf8(vec).map_err(|_| {
342        mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
343    })?);
344
345    Ok(s)
346}
347
348/// Equivalent to `std::BufRead::lines` which outputs each line as a `Vec<u8>`,
349/// which avoids panicking on non UTF-8 input.
350pub fn read_byte_lines<R: std::io::Read>(
351    mut buf_reader: BufReader<R>,
352) -> impl Iterator<Item = Vec<u8>> {
353    iter::from_fn(move || {
354        let mut buf = Vec::with_capacity(256);
355        let size = buf_reader.read_until(b'\n', &mut buf).ok()?;
356
357        if size == 0 {
358            return None;
359        }
360
361        // Trim (\r)\n
362        if buf.ends_with(b"\n") {
363            buf.pop();
364            if buf.ends_with(b"\r") {
365                buf.pop();
366            }
367        }
368
369        Some(buf)
370    })
371}
372
373/// Equivalent to `std::BufRead::lines` which outputs each line as an `OsString`
374/// This won't panic on non UTF-8 characters on Unix,
375/// but it still will on Windows.
376pub fn read_os_string_lines<R: std::io::Read>(
377    buf_reader: BufReader<R>,
378) -> impl Iterator<Item = OsString> {
379    read_byte_lines(buf_reader).map(|byte_line| os_string_from_vec(byte_line).expect("UTF-8 error"))
380}
381
382/// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'`
383///
384/// This macro functions accepts the same syntax as `format!`. The prompt is written to
385/// `stderr`. A space is also printed at the end for nice spacing between the prompt and
386/// the user input. Any input starting with `'y'` or `'Y'` is interpreted as `yes`.
387///
388/// # Examples
389/// ```
390/// use uucore::prompt_yes;
391/// let file = "foo.rs";
392/// prompt_yes!("Do you want to delete '{file}'?");
393/// ```
394/// will print something like below to `stderr` (with `util_name` substituted by the actual
395/// util name) and will wait for user input.
396/// ```txt
397/// util_name: Do you want to delete 'foo.rs'?
398/// ```
399#[macro_export]
400macro_rules! prompt_yes(
401    ($($args:tt)+) => ({
402        use std::io::Write;
403        eprint!("{}: ", uucore::util_name());
404        eprint!($($args)+);
405        eprint!(" ");
406        let res = std::io::stderr().flush().map_err(|err| {
407            $crate::error::USimpleError::new(1, err.to_string())
408        });
409        uucore::show_if_err!(res);
410        uucore::read_yes()
411    })
412);
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use std::ffi::OsStr;
418
419    fn make_os_vec(os_str: &OsStr) -> Vec<OsString> {
420        vec![
421            OsString::from("test"),
422            OsString::from("สวัสดี"), // spell-checker:disable-line
423            os_str.to_os_string(),
424        ]
425    }
426
427    #[cfg(any(unix, target_os = "redox"))]
428    fn test_invalid_utf8_args_lossy(os_str: &OsStr) {
429        // assert our string is invalid utf8
430        assert!(os_str.to_os_string().into_string().is_err());
431        let test_vec = make_os_vec(os_str);
432        let collected_to_str = test_vec.clone().into_iter().collect_lossy();
433        // conservation of length - when accepting lossy conversion no arguments may be dropped
434        assert_eq!(collected_to_str.len(), test_vec.len());
435        // first indices identical
436        for index in 0..2 {
437            assert_eq!(collected_to_str[index], test_vec[index].to_str().unwrap());
438        }
439        // lossy conversion for string with illegal encoding is done
440        assert_eq!(
441            *collected_to_str[2],
442            os_str.to_os_string().to_string_lossy()
443        );
444    }
445
446    #[cfg(any(unix, target_os = "redox"))]
447    fn test_invalid_utf8_args_ignore(os_str: &OsStr) {
448        // assert our string is invalid utf8
449        assert!(os_str.to_os_string().into_string().is_err());
450        let test_vec = make_os_vec(os_str);
451        let collected_to_str = test_vec.clone().into_iter().collect_ignore();
452        // assert that the broken entry is filtered out
453        assert_eq!(collected_to_str.len(), test_vec.len() - 1);
454        // assert that the unbroken indices are converted as expected
455        for index in 0..2 {
456            assert_eq!(
457                collected_to_str.get(index).unwrap(),
458                test_vec.get(index).unwrap().to_str().unwrap()
459            );
460        }
461    }
462
463    #[test]
464    fn valid_utf8_encoding_args() {
465        // create a vector containing only correct encoding
466        let test_vec = make_os_vec(&OsString::from("test2"));
467        // expect complete conversion without losses, even when lossy conversion is accepted
468        let _ = test_vec.into_iter().collect_lossy();
469    }
470
471    #[cfg(any(unix, target_os = "redox"))]
472    #[test]
473    fn invalid_utf8_args_unix() {
474        use std::os::unix::ffi::OsStrExt;
475
476        let source = [0x66, 0x6f, 0x80, 0x6f];
477        let os_str = OsStr::from_bytes(&source[..]);
478        test_invalid_utf8_args_lossy(os_str);
479        test_invalid_utf8_args_ignore(os_str);
480    }
481
482    #[test]
483    fn test_format_usage() {
484        assert_eq!(format_usage("expr EXPRESSION"), "expr EXPRESSION");
485        assert_eq!(
486            format_usage("expr EXPRESSION\nexpr OPTION"),
487            "expr EXPRESSION\n       expr OPTION"
488        );
489    }
490}