use crate::core::{display_width, Word};
#[derive(Debug, Clone)]
pub enum WordSplitter {
NoHyphenation,
HyphenSplitter,
Custom(fn(word: &str) -> Vec<usize>),
#[cfg(feature = "hyphenation")]
Hyphenation(hyphenation::Standard),
}
impl PartialEq<WordSplitter> for WordSplitter {
fn eq(&self, other: &WordSplitter) -> bool {
match (self, other) {
(WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
(WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
#[cfg(feature = "hyphenation")]
(WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
this_dict.language() == other_dict.language()
}
(_, _) => false,
}
}
}
impl WordSplitter {
pub fn split_points(&self, word: &str) -> Vec<usize> {
match self {
WordSplitter::NoHyphenation => Vec::new(),
WordSplitter::HyphenSplitter => {
let mut splits = Vec::new();
for (idx, _) in word.match_indices('-') {
let prev = word[..idx].chars().next_back();
let next = word[idx + 1..].chars().next();
if prev.filter(|ch| ch.is_alphanumeric()).is_some()
&& next.filter(|ch| ch.is_alphanumeric()).is_some()
{
splits.push(idx + 1); }
}
splits
}
WordSplitter::Custom(splitter_func) => splitter_func(word),
#[cfg(feature = "hyphenation")]
WordSplitter::Hyphenation(dictionary) => {
use hyphenation::Hyphenator;
dictionary.hyphenate(word).breaks
}
}
}
}
pub fn split_words<'a, I>(
words: I,
word_splitter: &'a WordSplitter,
) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
words.into_iter().flat_map(move |word| {
let mut prev = 0;
let mut split_points = word_splitter.split_points(&word).into_iter();
std::iter::from_fn(move || {
if let Some(idx) = split_points.next() {
let need_hyphen = !word[..idx].ends_with('-');
let w = Word {
word: &word.word[prev..idx],
width: display_width(&word[prev..idx]),
whitespace: "",
penalty: if need_hyphen { "-" } else { "" },
};
prev = idx;
return Some(w);
}
if prev < word.word.len() || prev == 0 {
let w = Word {
word: &word.word[prev..],
width: display_width(&word[prev..]),
whitespace: word.whitespace,
penalty: word.penalty,
};
prev = word.word.len() + 1;
return Some(w);
}
None
})
})
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}
#[test]
fn split_words_no_words() {
assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
}
#[test]
fn split_words_empty_word() {
assert_iter_eq!(
split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
vec![Word::from(" ")]
);
}
#[test]
fn split_words_single_word() {
assert_iter_eq!(
split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foobar")]
);
}
#[test]
fn split_words_hyphen_splitter() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
vec![Word::from("foo-"), Word::from("bar")]
);
}
#[test]
fn split_words_no_hyphenation() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
vec![Word::from("foo-bar")]
);
}
#[test]
fn split_words_adds_penalty() {
let fixed_split_point = |_: &str| vec![3];
assert_iter_eq!(
split_words(
vec![Word::from("foobar")].into_iter(),
&WordSplitter::Custom(fixed_split_point)
),
vec![
Word {
word: "foo",
width: 3,
whitespace: "",
penalty: "-"
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
assert_iter_eq!(
split_words(
vec![Word::from("fo-bar")].into_iter(),
&WordSplitter::Custom(fixed_split_point)
),
vec![
Word {
word: "fo-",
width: 3,
whitespace: "",
penalty: ""
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
}
}