[go: up one dir, main page]

slug/
lib.rs

1use deunicode::deunicode_char;
2#[cfg(target_family = "wasm")]
3use wasm_bindgen::prelude::*;
4
5/// Convert any unicode string to an ascii "slug" (useful for file names/url components)
6///
7/// The returned "slug" will consist of a-z, 0-9, and '-'. Furthermore, a slug will
8/// never contain more than one '-' in a row and will never start or end with '-'.
9///
10/// ```rust
11/// use self::slug::slugify;
12///
13/// assert_eq!(slugify("My Test String!!!1!1"), "my-test-string-1-1");
14/// assert_eq!(slugify("test\nit   now!"), "test-it-now");
15/// assert_eq!(slugify("  --test_-_cool"), "test-cool");
16/// assert_eq!(slugify("Æúű--cool?"), "aeuu-cool");
17/// assert_eq!(slugify("You & Me"), "you-me");
18/// assert_eq!(slugify("user@example.com"), "user-example-com");
19/// ```
20pub fn slugify<S: AsRef<str>>(s: S) -> String {
21    _slugify(s.as_ref())
22}
23
24#[doc(hidden)]
25#[cfg(target_family = "wasm")]
26#[wasm_bindgen(js_name = slugify)]
27pub fn slugify_owned(s: String) -> String {
28    _slugify(s.as_ref())
29}
30
31// avoid unnecessary monomorphizations
32fn _slugify(s: &str) -> String {
33    let mut slug = String::with_capacity(s.len());
34    // Starts with true to avoid leading -
35    let mut prev_is_dash = true;
36    {
37        let mut push_char = |x: u8| {
38            match x {
39                b'a'..=b'z' | b'0'..=b'9' => {
40                    prev_is_dash = false;
41                    slug.push(x.into());
42                }
43                b'A'..=b'Z' => {
44                    prev_is_dash = false;
45                    // Manual lowercasing as Rust to_lowercase() is unicode
46                    // aware and therefore much slower
47                    slug.push((x - b'A' + b'a').into());
48                }
49                _ => {
50                    if !prev_is_dash {
51                        slug.push('-');
52                        prev_is_dash = true;
53                    }
54                }
55            }
56        };
57
58        for c in s.chars() {
59            if c.is_ascii() {
60                (push_char)(c as u8);
61            } else {
62                for &cx in deunicode_char(c).unwrap_or("-").as_bytes() {
63                    (push_char)(cx);
64                }
65            }
66        }
67    }
68
69    if slug.ends_with('-') {
70        slug.pop();
71    }
72    // We likely reserved more space than needed.
73    slug.shrink_to_fit();
74    slug
75}