[go: up one dir, main page]

osi/
str.rs

1//! # String Utilities
2//!
3//! This module provides utilities for string operations.
4
5/// Convert a string into an option based on whether it is empty.
6///
7/// This takes a string `v` and tests whether it is empty. If it is, it will
8/// yield `None`, otherwise it will yield `Some(v)`.
9pub fn some(v: &str) -> Option<&str> {
10    if v.is_empty() {
11        None
12    } else {
13        Some(v)
14    }
15}
16
17/// Compare Strings with a natural sort order.
18///
19/// This takes two strings and compares them with natural sort order, trying
20/// to interpret digit runs as natural numbers.
21pub fn cmp_natural(
22    mut lhs: &str,
23    mut rhs: &str,
24) -> core::cmp::Ordering {
25    // Advance over a string by splitting off a non-digit prefix, followed by
26    // a digit-only prefix. The prefixes are yielded to the caller.
27    fn advance<'a>(
28        stream: &mut &'a str,
29    ) -> (&'a str, &'a str) {
30        let rem = *stream;
31
32        // Split off non-digit prefix.
33        let (name, rem) = rem.split_at(
34            rem.find(|v: char| v.is_numeric())
35                .unwrap_or(rem.len()),
36        );
37
38        // Split off digit-only prefix.
39        let (number, rem) = rem.split_at(
40            rem.find(|v: char| !v.is_numeric())
41                .unwrap_or(rem.len()),
42        );
43
44        // Advance stream and return the name+number tuple.
45        *stream = rem;
46        (name, number)
47    }
48
49    // Advance both sides one by one and compare each token individually.
50    while !lhs.is_empty() || !rhs.is_empty() {
51        let (l_name, l_num) = advance(&mut lhs);
52        let (r_name, r_num) = advance(&mut rhs);
53        let l_u64 = l_num.parse::<u64>();
54        let r_u64 = r_num.parse::<u64>();
55
56        // Compare the non-digit prefix.
57        match l_name.cmp(r_name) {
58            v @ core::cmp::Ordering::Less => return v,
59            v @ core::cmp::Ordering::Greater => return v,
60            _ => {},
61        }
62
63        // Compare the digit-only prefix as u64, if possible. Note that
64        // different strings can map to the same u64, so even if both u64s
65        // are equal, we have to continue comparing their original string
66        // representation.
67        if let (Ok(l), Ok(r)) = (l_u64, r_u64) {
68            match l.cmp(&r) {
69                v @ core::cmp::Ordering::Less => return v,
70                v @ core::cmp::Ordering::Greater => return v,
71                _ => {},
72            }
73        }
74
75        // Compare the digit-only prefix as string.
76        match l_num.cmp(r_num) {
77            v @ core::cmp::Ordering::Less => return v,
78            v @ core::cmp::Ordering::Greater => return v,
79            _ => {},
80        }
81    }
82
83    core::cmp::Ordering::Equal
84}
85
86/// Turn strings into valid symbol identifiers.
87///
88/// Create a new string that has the same content as the input but all
89/// unsupported characters replaced by an underscore. Only alphanumeric
90/// characters are supported (but the full unicode range).
91///
92/// Additionally, if the string starts with a numeric character, it is
93/// prefixed with an underscore.
94pub fn symbolize(input: &str) -> alloc::string::String {
95    let needs_prefix = input.chars()
96        .next()
97        .map(char::is_numeric)
98        .unwrap_or(true);
99
100    let mut v = alloc::string::String::with_capacity(
101        input.len() + (needs_prefix as usize),
102    );
103
104    if needs_prefix {
105        v.push('_');
106    }
107
108    for c in input.chars() {
109        if c.is_alphanumeric() {
110            v.push(c);
111        } else {
112            v.push('_');
113        }
114    }
115
116    v
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    // Verify behavior of `some()`, ensuring that it turns empty strings into
124    // `None` and everything else into `Some(v)`.
125    #[test]
126    fn some_basic() {
127        assert_eq!(some(""), None);
128        assert_eq!(some(" "), Some(" "));
129        assert_eq!(some("foobar"), Some("foobar"));
130    }
131
132    // Verify Natural Sort Order
133    //
134    // Check that `cmp_natural()` orders based on the natural sort order,
135    // rather than on lexicographic sort order.
136    #[test]
137    fn cmp_natural_basic() {
138        assert_eq!(
139            cmp_natural("foobar", "foobar"),
140            core::cmp::Ordering::Equal,
141        );
142        assert_eq!(
143            cmp_natural("foobar0", "foobar1"),
144            core::cmp::Ordering::Less,
145        );
146        assert_eq!(
147            cmp_natural("foobar1", "foobar0"),
148            core::cmp::Ordering::Greater,
149        );
150        assert_eq!(
151            cmp_natural("foobar2", "foobar10"),
152            core::cmp::Ordering::Less,
153        );
154        assert_eq!(
155            cmp_natural("foo2bar3", "foo10bar10"),
156            core::cmp::Ordering::Less,
157        );
158    }
159
160    // Run some basic string conversion tests on the `symbolize()` helper. It
161    // should properly prepend prefixes and replace unsupported characters.
162    #[test]
163    fn symbolize_basic() {
164        assert_eq!(symbolize(""), "_");
165        assert_eq!(symbolize("foobar"), "foobar");
166        assert_eq!(symbolize("0foobar"), "_0foobar");
167        assert_eq!(symbolize("foo-bar"), "foo_bar");
168        assert_eq!(symbolize("0foo-bar"), "_0foo_bar");
169        assert_eq!(symbolize("foo(bar)"), "foo_bar_");
170    }
171}