slug/lib.rs
1use deunicode::deunicode_char;
2#[cfg(target_family = "wasm")]
3use wasm_bindgen::prelude::*;
4
5/// Convert any unicode string to an ascii "slug" (useful for file names/url components)
6///
7/// The returned "slug" will consist of a-z, 0-9, and '-'. Furthermore, a slug will
8/// never contain more than one '-' in a row and will never start or end with '-'.
9///
10/// ```rust
11/// use self::slug::slugify;
12///
13/// assert_eq!(slugify("My Test String!!!1!1"), "my-test-string-1-1");
14/// assert_eq!(slugify("test\nit now!"), "test-it-now");
15/// assert_eq!(slugify(" --test_-_cool"), "test-cool");
16/// assert_eq!(slugify("Æúű--cool?"), "aeuu-cool");
17/// assert_eq!(slugify("You & Me"), "you-me");
18/// assert_eq!(slugify("user@example.com"), "user-example-com");
19/// ```
20pub fn slugify<S: AsRef<str>>(s: S) -> String {
21 _slugify(s.as_ref())
22}
23
24#[doc(hidden)]
25#[cfg(target_family = "wasm")]
26#[wasm_bindgen(js_name = slugify)]
27pub fn slugify_owned(s: String) -> String {
28 _slugify(s.as_ref())
29}
30
31// avoid unnecessary monomorphizations
32fn _slugify(s: &str) -> String {
33 let mut slug = String::with_capacity(s.len());
34 // Starts with true to avoid leading -
35 let mut prev_is_dash = true;
36 {
37 let mut push_char = |x: u8| {
38 match x {
39 b'a'..=b'z' | b'0'..=b'9' => {
40 prev_is_dash = false;
41 slug.push(x.into());
42 }
43 b'A'..=b'Z' => {
44 prev_is_dash = false;
45 // Manual lowercasing as Rust to_lowercase() is unicode
46 // aware and therefore much slower
47 slug.push((x - b'A' + b'a').into());
48 }
49 _ => {
50 if !prev_is_dash {
51 slug.push('-');
52 prev_is_dash = true;
53 }
54 }
55 }
56 };
57
58 for c in s.chars() {
59 if c.is_ascii() {
60 (push_char)(c as u8);
61 } else {
62 for &cx in deunicode_char(c).unwrap_or("-").as_bytes() {
63 (push_char)(cx);
64 }
65 }
66 }
67 }
68
69 if slug.ends_with('-') {
70 slug.pop();
71 }
72 // We likely reserved more space than needed.
73 slug.shrink_to_fit();
74 slug
75}