[go: up one dir, main page]

natord/
lib.rs

1// Natord: Natural ordering for Rust.
2// Copyright (c) 2014-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5/*!
6
7# Natord 1.0.9
8
9Natural ordering for Rust. (also known as `rust-natord`)
10This allows for the comparison like this:
11
12~~~~ {.rust}
13let mut files = vec!("rfc2086.txt", "rfc822.txt", "rfc1.txt");
14files.sort_by(|&a, &b| natord::compare(a, b));
15assert_eq!(files, ["rfc1.txt", "rfc822.txt", "rfc2086.txt"]);
16~~~~
17
18There are multiple natural ordering algorithms available.
19This version of natural ordering is inspired by
20[Martin Pool's `strnatcmp.c`](http://sourcefrog.net/projects/natsort/).
21
22*/
23
24#![crate_name = "natord"]
25#![crate_type = "lib"]
26
27use std::cmp::Ordering;
28use std::cmp::Ordering::{Less, Equal, Greater};
29
30/// Compares two iterators of "characters" possibly containing "digits".
31/// The natural ordering can be customized with the following parameters:
32///
33/// * `skip` returns true if the "character" does not affect the comparison,
34///   other than splitting two consecutive digits.
35/// * `cmp` compares two "characters", assuming that they are not "digits".
36/// * `to_digit` converts a "character" into a "digit" if possible. The digit of zero is special.
37pub fn compare_iter<T, L, R, Skip, Cmp, ToDigit>(left: L, right: R, mut skip: Skip, mut cmp: Cmp,
38                                                 mut to_digit: ToDigit) -> Ordering
39        where L: Iterator<Item=T>,
40              R: Iterator<Item=T>,
41              Skip: for<'a> FnMut(&'a T) -> bool,
42              Cmp: for<'a> FnMut(&'a T, &'a T) -> Ordering,
43              ToDigit: for<'a> FnMut(&'a T) -> Option<isize> {
44    let mut left = left.fuse();
45    let mut right = right.fuse();
46
47    let mut l;
48    let mut r;
49    let mut ll;
50    let mut rr;
51
52    macro_rules! read_left {
53        () => ({
54            l = left.next();
55            ll = l.as_ref().and_then(|v| to_digit(v));
56        })
57    }
58
59    macro_rules! read_right {
60        () => ({
61            r = right.next();
62            rr = r.as_ref().and_then(|v| to_digit(v));
63        })
64    }
65
66    macro_rules! return_unless_equal {
67        ($ord:expr) => (
68            match $ord {
69                Equal => {}
70                lastcmp => return lastcmp,
71            }
72        )
73    }
74
75    read_left!();
76    read_right!();
77    'nondigits: loop {
78        // skip preceding whitespaces
79        while l.as_ref().map_or(false, |c| skip(c)) { read_left!(); }
80        while r.as_ref().map_or(false, |c| skip(c)) { read_right!(); }
81
82        match (l, r) {
83            (Some(l_), Some(r_)) => match (ll, rr) {
84                (Some(ll_), Some(rr_)) => {
85                    if ll_ == 0 || rr_ == 0 {
86                        // left-aligned matching. (`015` < `12`)
87                        return_unless_equal!(ll_.cmp(&rr_));
88                        'digits_left: loop {
89                            read_left!();
90                            read_right!();
91                            match (ll, rr) {
92                                (Some(ll_), Some(rr_)) => return_unless_equal!(ll_.cmp(&rr_)),
93                                (Some(_), None) => return Greater,
94                                (None, Some(_)) => return Less,
95                                (None, None) => break 'digits_left,
96                            }
97                        }
98                    } else {
99                        // right-aligned matching. (`15` < `123`)
100                        let mut lastcmp = ll_.cmp(&rr_);
101                        'digits_right: loop {
102                            read_left!();
103                            read_right!();
104                            match (ll, rr) {
105                                (Some(ll_), Some(rr_)) => {
106                                    // `lastcmp` is only used when there are the same number of
107                                    // digits, so we only update it.
108                                    if lastcmp == Equal { lastcmp = ll_.cmp(&rr_); }
109                                }
110                                (Some(_), None) => return Greater,
111                                (None, Some(_)) => return Less,
112                                (None, None) => break 'digits_right,
113                            }
114                        }
115                        return_unless_equal!(lastcmp);
116                    }
117                    continue 'nondigits; // do not read from the iterators again
118                },
119                (_, _) => return_unless_equal!(cmp(&l_, &r_)),
120            },
121            (Some(_), None) => return Greater,
122            (None, Some(_)) => return Less,
123            (None, None) => return Equal,
124        }
125
126        read_left!();
127        read_right!();
128    }
129}
130
131/// Compares two strings case-sensitively.
132/// It skips any Unicode whitespaces and handles a series of decimal digits.
133pub fn compare(left: &str, right: &str) -> Ordering {
134    compare_iter(left.chars(), right.chars(),
135                 |&c| c.is_whitespace(),
136                 |&l, &r| l.cmp(&r),
137                 |&c| c.to_digit(10).map(|v| v as isize))
138}
139
140/// Compares two strings case-insensitively.
141/// It skips any Unicode whitespaces and handles a series of decimal digits.
142pub fn compare_ignore_case(left: &str, right: &str) -> Ordering {
143    // XXX what we really want is a case folding!
144    // Unicode case folding can be done iteratively, but currently we don't have them in stdlib.
145
146    let left_iter  =  left.chars().flat_map(|c| c.to_lowercase());
147    let right_iter = right.chars().flat_map(|c| c.to_lowercase());
148
149    compare_iter(left_iter, right_iter,
150                 |&c| c.is_whitespace(),
151                 |&l, &r| l.cmp(&r),
152                 |&c| c.to_digit(10).map(|v| v as isize))
153}
154
155#[cfg(test)]
156mod tests {
157    use super::compare;
158    use std::cmp::Ordering;
159    use std::cmp::Ordering::{Less, Equal, Greater};
160
161    fn check_total_order(strs: &[&str]) {
162        fn ordering_to_op(ord: Ordering) -> &'static str {
163            match ord {
164                Greater => ">",
165                Equal => "=",
166                Less => "<",
167            }
168        }
169
170        for (i, &x) in strs.iter().enumerate() {
171            for (j, &y) in strs.iter().enumerate() {
172                assert!(compare(x, y) == i.cmp(&j),
173                        "expected x {} y, returned x {} y (where x = `{}`, y = `{}`)",
174                        ordering_to_op(i.cmp(&j)), ordering_to_op(compare(x, y)), x, y);
175            }
176        }
177    }
178
179    #[test]
180    fn test_numeric() {
181        check_total_order(&["a", "a0", "a1", "a1a", "a1b", "a2", "a10", "a20"]);
182    }
183
184    #[test]
185    fn test_multiple_parts() {
186        check_total_order(&["x2-g8", "x2-y7", "x2-y8", "x8-y8"]);
187    }
188
189    #[test]
190    fn test_leading_zeroes() {
191        check_total_order(&["1.001", "1.002", "1.010", "1.02", "1.1", "1.3"]);
192    }
193
194    #[test]
195    fn test_longer() {
196        check_total_order(&[
197            "1-02",
198            "1-2",
199            "1-20",
200            "10-20",
201            "fred",
202            "jane",
203            "pic1",
204            "pic2",
205            "pic2a",
206            "pic3",
207            "pic4",
208            "pic4   alpha",
209            "pic 4 else",
210            "pic4  last",
211            "pic5",
212            "pic5.07",
213            "pic5.08",
214            "pic5.13",
215            "pic5.113",
216            "pic 5 something",
217            "pic 6",
218            "pic   7",
219            "pic100",
220            "pic100a",
221            "pic120",
222            "pic121",
223            "pic2000",
224            "tom",
225            "x2-g8",
226            "x2-y7",
227            "x2-y8",
228            "x8-y8",
229        ]);
230    }
231}