[go: up one dir, main page]

wild/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2//! Emulates glob (wildcard) argument expansion on Windows. No-op on other platforms.
3//!
4//! Unix shells expand command-line arguments like `a*`, `file.???` and pass them expanded to applications.
5//! On Windows `cmd.exe` doesn't do that, so this crate emulates the expansion there.
6//! Instead of `std::env::args()` use `wild::args()`.
7//!
8//! The glob syntax on Windows is limited to `*`, `?`, and `[a-z]`/`[!a-z]` ranges.
9//! Glob characters in quotes (`"*"`) are not expanded.
10//!
11//! Parsing of quoted arguments precisely follows Windows native syntax (`CommandLineToArgvW`, specifically)
12//! with all its weirdness.
13//!
14//! ## Usage
15//!
16//! Use `wild::args()` instead of  `std::env::args()` (or `wild::args_os()` instead of  `std::env::args_os()`).
17//!
18//! If you use [Clap](https://lib.rs/crates/clap), use `.get_matches_from(wild::args_os())` instead of `.get_matches()`.
19
20/// An optional, experimental low-level interface for parsing command-line strings from other sources. In most cases [`args`] and [`args_os`] are more appropriate.
21#[cfg(any(test, windows))]
22pub mod parser;
23
24#[cfg(any(test, windows))]
25mod globiter;
26
27#[cfg(any(test, windows))]
28mod argsiter;
29#[cfg(windows)]
30pub use crate::argsiter::*;
31
32/// Iterator of arguments. Equivalent to `std::env::Args`. See [`args`] for details.
33///
34/// On unix it's an alias for `std::env::Args`.
35/// On Windows it's a custom iterator that implements glog expansion.
36#[cfg(not(windows))]
37pub type Args = std::env::Args;
38
39/// Same as [`Args`], but keeps invalid Unicode intact.
40#[cfg(not(windows))]
41pub type ArgsOs = std::env::ArgsOs;
42
43/// Returns an iterator of glob-expanded command-line arguments. Equivalent of `std::env::args()`.
44///
45/// On non-Windows platforms it returns `env::args()` as-is,
46/// assuming expansion has already been done by the shell.
47///
48/// On Windows it emulates the glob expansion itself.
49/// The iterator will parse arguments incrementally and access
50/// the file system as it parses. This allows reading potentially huge lists of
51/// filenames, but it's not an atomic snapshot (use `.collect()` if you need that).
52#[cfg(not(windows))]
53#[must_use]
54pub fn args() -> Args {
55    std::env::args()
56}
57
58#[cfg(not(windows))]
59#[must_use]
60pub fn args_os() -> ArgsOs {
61    std::env::args_os()
62}
63
64/// Returns an iterator of glob-expanded command-line arguments. Equivalent of `std::env::args()`.
65///
66/// On Windows it emulates the glob expansion itself.
67/// The iterator will parse arguments incermentally and access
68/// the file system as it parses. This allows reading potentially huge lists of
69/// filenames, but it's not an atomic snapshot (use `.collect()` if you need that).
70///
71/// On non-Windows platforms it returns `env::args()` as-is,
72/// assuming expansion has already been done by the shell.
73#[cfg(windows)]
74#[must_use]
75pub fn args() -> Args {
76    Args { iter: args_os() }
77}
78
79/// Same as [`args`], but returns `OsString`
80#[cfg(windows)]
81#[must_use]
82pub fn args_os() -> ArgsOs {
83    ArgsOs::from_raw_command_line(raw_command_line())
84}
85
86#[cfg(windows)]
87extern "system" {
88    fn GetCommandLineW() -> *const u16;
89}
90
91#[cfg(windows)]
92fn raw_command_line() -> &'static [u16] {
93    unsafe {
94        let line_ptr = GetCommandLineW();
95        if line_ptr.is_null() {
96            return &[];
97        }
98        let mut len = 0;
99        while *line_ptr.add(len) != 0 {
100            len += 1;
101        }
102        std::slice::from_raw_parts(line_ptr, len)
103    }
104}
105
106#[cfg(test)]
107fn parsed(s: &str) -> String {
108    let t: Vec<_> = s.encode_utf16().collect();
109    let args: Vec<_> = globiter::GlobArgs::new(&t)
110        .map(|s| s.pattern.map(|p| format!("<glob {p}>")).unwrap_or(s.text.to_string_lossy().into_owned()))
111        .collect();
112    args.join(";")
113}
114
115#[cfg(test)]
116fn unquoted(s: &str) -> String {
117    let t: Vec<_> = s.encode_utf16().collect();
118    let args: Vec<_> = globiter::GlobArgs::new(&t)
119        .map(|s| s.text.to_string_lossy().to_string())
120        .collect();
121    args.join(";")
122}
123
124#[test]
125fn test_actual_args() {
126    assert!(args_os().count() >= 1);
127}
128
129#[test]
130fn test_parse_1() {
131    assert_eq!(r#"漢字"#, parsed("漢字"));
132    assert_eq!(r#"漢字"#, parsed("\"漢字\""));
133    assert_eq!(r#"漢\字"#, parsed("\"漢\\字\""));
134    assert_eq!(r#"unquoted"#, parsed("unquoted"));
135    assert_eq!(r#"<glob *>"#, parsed("*"));
136    assert_eq!(r#"<glob ?>"#, parsed("?"));
137    assert_eq!(r#"quoted"#, parsed("\"quoted\""));
138    assert_eq!(r#"quoted"#, unquoted("\"quoted\""));
139    assert_eq!(r#"*"#, unquoted("\"*\""));
140    assert_eq!(r#"?"#, unquoted("\"?\""));
141    assert_eq!(r#"]"#, unquoted("\"]\""));
142    assert_eq!(r#"quo"ted"#, parsed(r#"  "quo\"ted"  "#)); // backslash can escape quotes
143    assert_eq!(r#"<glob quo"ted?  >"#, parsed(r#"  "quo""ted?"  "#)); // and quote can escape quotes
144    assert_eq!(r#"unquo"ted"#, parsed(r#"  unquo\"ted  "#)); // backslash can escape quotes, even outside quotes
145    assert_eq!(r#"<glob unquoted?>"#, parsed(r#"  unquo""ted?  "#)); // quote escaping does not work outside quotes
146    assert_eq!(r#"""#, parsed(r#""""""#)); // quote escapes quote in quoted string
147    assert_eq!(r#"""#, parsed(r#"""""""#));
148    assert_eq!(r#""""#, parsed(r#""""""""#));
149    assert_eq!(r#""""#, parsed(r#"""""""""#)); // """ == "X", """""" = "X""X"
150    assert_eq!(r#""""#, parsed(r#""""""""""#));
151    assert_eq!(r#"""""#, parsed(r#"""""""""""#));
152    assert_eq!(r#"\\server\share\path with spaces"#, parsed(r#""\\server\share\path with spaces""#)); // lone double backslash is not special
153    assert_eq!("aba", parsed(r#""a"b"a""#)); // quotes can go in and out
154    assert_eq!("abac", parsed(r#""a"b"a"c"#)); // quotes can go in and out
155    assert_eq!(r#"\\"#, parsed(r#"\\\\""#));
156    assert_eq!(r#"<glob ?\\?>"#, parsed(r#"?\\\\"?"#)); // unpaired quote is interpreted like an end quote
157    assert_eq!(r#"\""#, parsed(r#"\\\""#));
158    assert_eq!(r#"<glob \"[a-z]>"#, parsed(r#"\\\"[a-z]"#));
159    assert_eq!("    ", parsed(r#""    "#)); // unterminated quotes are OK
160    assert_eq!("", parsed(r#""""#));
161    assert_eq!(r#"<glob [a-c][d-z]>"#, parsed(r#"[a-c]""[d-z]"#));
162    assert_eq!("", parsed(r#"""#));
163    assert_eq!("x", parsed(r#"x""#));
164    assert_eq!(r#"\;x;y"#, parsed(r"\ x y"));
165    assert_eq!(r#"\\;x;y"#, parsed(r"\\ x y"));
166    assert_eq!(r#"a\\\;x;y"#, parsed(r"a\\\ x y"));
167    assert_eq!(r#"<glob a\\\*>;x;y"#, parsed(r"a\\\* x y"));
168    assert_eq!(r#"a\\\ x;y"#, parsed(r#""a\\\ x" y"#));
169    assert_eq!(r#"\"#, parsed(r"\"));
170    assert_eq!(r#"\\"#, parsed(r"\\"));
171    assert_eq!(r#"\\\"#, parsed(r"\\\"));
172    assert_eq!(r#"\\\\"#, parsed(r"\\\\"));
173    assert_eq!(r#"\\a"#, parsed(r#"\\\\"a"#));
174    assert_eq!(r#"\\a"#, parsed(r#"\\\\"a""#));
175    assert_eq!(r#"¥¥"#, parsed(r#"¥¥""#)); // in Unicode this isn't backslash
176    assert_eq!(r#".\path\to\folder\;-rf"#, parsed(r#".\path\to\folder\ -rf"#));
177}
178
179#[test]
180#[cfg(not(feature = "glob-quoted-on-windows"))]
181fn test_unquoted() {
182    assert_eq!(r#"*"#, parsed("\"*\""));
183    assert_eq!(r#"?"#, parsed("\"?\""));
184    assert_eq!(r#"]"#, parsed("\"]\""));
185    assert_eq!("<glob c*a[*]b*a[*]c*>", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out
186    assert_eq!(r#"<glob [[]a-c[]]"[d-z]>"#, parsed(r#""[a-c]""[d-z]""#));
187}
188
189#[test]
190#[cfg(feature = "glob-quoted-on-windows")]
191fn test_unquoted() {
192    assert_eq!(r#"<glob *>"#, parsed("\"*\""));
193    assert_eq!(r#"<glob ?>"#, parsed("\"?\""));
194    assert_eq!(r#"<glob ]>"#, parsed("\"]\""));
195    assert_eq!("<glob c*a*b*a*c*>", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out
196    assert_eq!(r#"<glob [a-c]"[d-z]>"#, parsed(r#""[a-c]""[d-z]""#));
197}
198
199#[test]
200fn test_parse_multi() {
201    assert_eq!(r#"unquoted;quoted"#, parsed("unquoted \"quoted\""));
202    assert_eq!(r#"quo"ted;quo"ted    "#, parsed(r#"  "quo\"ted"  "quo""ted"    "#));
203    assert_eq!(r#"unquo"ted;""#, parsed(r#" unquo\"ted """"""#));
204    assert_eq!(r#"a;a"#, parsed(r#"a"" a"#));
205    assert_eq!(r#"a";a"#, parsed(r#"a""" a"#));
206    assert_eq!(r#"\\;\""#, parsed(r#"\\\\"       \\\"  "#));
207    assert_eq!("x;    ", parsed(r#" x  "    "#));
208}