use crate::{Options, WordSplitter};
#[cfg(feature = "smawk")]
mod optimal_fit;
#[cfg(feature = "smawk")]
pub use optimal_fit::wrap_optimal_fit;
const CSI: (char, char) = ('\x1b', '[');
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
#[inline]
fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
if ch == CSI.0 && chars.next() == Some(CSI.1) {
for ch in chars {
if ANSI_FINAL_BYTE.contains(&ch) {
return true;
}
}
}
false
}
#[cfg(feature = "unicode-width")]
#[inline]
fn ch_width(ch: char) -> usize {
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
}
#[cfg(not(feature = "unicode-width"))]
const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
#[cfg(not(feature = "unicode-width"))]
#[inline]
fn ch_width(ch: char) -> usize {
if ch < DOUBLE_WIDTH_CUTOFF {
1
} else {
2
}
}
#[inline]
pub fn display_width(text: &str) -> usize {
let mut chars = text.chars();
let mut width = 0;
while let Some(ch) = chars.next() {
if skip_ansi_escape_sequence(ch, &mut chars) {
continue;
}
width += ch_width(ch);
}
width
}
pub trait Fragment: std::fmt::Debug {
fn width(&self) -> usize;
fn whitespace_width(&self) -> usize;
fn penalty_width(&self) -> usize;
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Word<'a> {
word: &'a str,
width: usize,
pub(crate) whitespace: &'a str,
pub(crate) penalty: &'a str,
}
impl std::ops::Deref for Word<'_> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.word
}
}
impl<'a> Word<'a> {
pub fn from(word: &str) -> Word<'_> {
let trimmed = word.trim_end_matches(' ');
Word {
word: trimmed,
width: display_width(&trimmed),
whitespace: &word[trimmed.len()..],
penalty: "",
}
}
pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
let mut char_indices = self.word.char_indices();
let mut offset = 0;
let mut width = 0;
std::iter::from_fn(move || {
while let Some((idx, ch)) = char_indices.next() {
if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
continue;
}
if width > 0 && width + ch_width(ch) > line_width {
let word = Word {
word: &self.word[offset..idx],
width: width,
whitespace: "",
penalty: "",
};
offset = idx;
width = ch_width(ch);
return Some(word);
}
width += ch_width(ch);
}
if offset < self.word.len() {
let word = Word {
word: &self.word[offset..],
width: width,
whitespace: self.whitespace,
penalty: self.penalty,
};
offset = self.word.len();
return Some(word);
}
None
})
}
}
impl Fragment for Word<'_> {
#[inline]
fn width(&self) -> usize {
self.width
}
#[inline]
fn whitespace_width(&self) -> usize {
self.whitespace.len()
}
#[inline]
fn penalty_width(&self) -> usize {
self.penalty.len()
}
}
pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = line.char_indices();
std::iter::from_fn(move || {
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, ch)) = char_indices.next() {
if in_whitespace && ch != ' ' {
let word = Word::from(&line[start..idx]);
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}
in_whitespace = ch == ' ';
}
if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}
None
})
}
pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
S: WordSplitter,
Opt: Into<Options<'a, S>>,
{
let options = options.into();
words.into_iter().flat_map(move |word| {
let mut prev = 0;
let mut split_points = options.splitter.split_points(&word).into_iter();
std::iter::from_fn(move || {
if let Some(idx) = split_points.next() {
let need_hyphen = !word[..idx].ends_with('-');
let w = Word {
word: &word.word[prev..idx],
width: display_width(&word[prev..idx]),
whitespace: "",
penalty: if need_hyphen { "-" } else { "" },
};
prev = idx;
return Some(w);
}
if prev < word.word.len() || prev == 0 {
let w = Word {
word: &word.word[prev..],
width: display_width(&word[prev..]),
whitespace: word.whitespace,
penalty: word.penalty,
};
prev = word.word.len() + 1;
return Some(w);
}
None
})
})
}
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
let mut shortened_words = Vec::new();
for word in words {
if word.width() > line_width {
shortened_words.extend(word.break_apart(line_width));
} else {
shortened_words.push(word);
}
}
shortened_words
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum WrapAlgorithm {
#[cfg(feature = "smawk")]
OptimalFit,
FirstFit,
}
pub fn wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>(
fragments: &[T],
line_widths: F,
) -> Vec<&[T]> {
let mut lines = Vec::new();
let mut start = 0;
let mut width = 0;
for (idx, fragment) in fragments.iter().enumerate() {
let line_width = line_widths(lines.len());
if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
lines.push(&fragments[start..idx]);
start = idx;
width = 0;
}
width += fragment.width() + fragment.whitespace_width();
}
lines.push(&fragments[start..]);
lines
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "unicode-width")]
use unicode_width::UnicodeWidthChar;
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}
#[test]
fn skip_ansi_escape_sequence_works() {
let blue_text = "\u{1b}[34mHello\u{1b}[0m";
let mut chars = blue_text.chars();
let ch = chars.next().unwrap();
assert!(skip_ansi_escape_sequence(ch, &mut chars));
assert_eq!(chars.next(), Some('H'));
}
#[test]
fn emojis_have_correct_width() {
use unic_emoji_char::is_emoji;
for ch in '\u{1}'..'\u{FF}' {
if is_emoji(ch) {
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
#[cfg(feature = "unicode-width")]
assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(ch_width(ch), 1, "char: {}", desc);
}
}
for ch in '\u{FF}'..'\u{2FFFF}' {
if is_emoji(ch) {
let desc = format!("{:?} U+{:04X}", ch, ch as u32);
#[cfg(feature = "unicode-width")]
assert!(ch.width().unwrap() <= 2, "char: {}", desc);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(ch_width(ch), 2, "char: {}", desc);
}
}
}
#[test]
fn display_width_works() {
assert_eq!("Café Plain".len(), 11); assert_eq!(display_width("Café Plain"), 10);
assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
}
#[test]
fn display_width_narrow_emojis() {
#[cfg(feature = "unicode-width")]
assert_eq!(display_width("⁉"), 1);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(display_width("⁉"), 2);
}
#[test]
fn display_width_narrow_emojis_variant_selector() {
#[cfg(feature = "unicode-width")]
assert_eq!(display_width("⁉\u{fe0f}"), 1);
#[cfg(not(feature = "unicode-width"))]
assert_eq!(display_width("⁉\u{fe0f}"), 4);
}
#[test]
fn display_width_emojis() {
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
}
#[test]
fn find_words_empty() {
assert_iter_eq!(find_words(""), vec![]);
}
#[test]
fn find_words_single_word() {
assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
}
#[test]
fn find_words_two_words() {
assert_iter_eq!(
find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
);
}
#[test]
fn find_words_multiple_words() {
assert_iter_eq!(
find_words("foo bar baz"),
vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
);
}
#[test]
fn find_words_whitespace() {
assert_iter_eq!(find_words(" "), vec![Word::from(" ")]);
}
#[test]
fn find_words_inter_word_whitespace() {
assert_iter_eq!(
find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
)
}
#[test]
fn find_words_trailing_whitespace() {
assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]);
}
#[test]
fn find_words_leading_whitespace() {
assert_iter_eq!(
find_words(" foo"),
vec![Word::from(" "), Word::from("foo")]
);
}
#[test]
fn find_words_multi_column_char() {
assert_iter_eq!(
find_words("\u{1f920}"), vec![Word::from("\u{1f920}")]
);
}
#[test]
fn find_words_hyphens() {
assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
assert_iter_eq!(
find_words("foo- bar"),
vec![Word::from("foo- "), Word::from("bar")]
);
assert_iter_eq!(
find_words("foo - bar"),
vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
);
assert_iter_eq!(
find_words("foo -bar"),
vec![Word::from("foo "), Word::from("-bar")]
);
}
#[test]
fn split_words_no_words() {
assert_iter_eq!(split_words(vec![], 80), vec![]);
}
#[test]
fn split_words_empty_word() {
assert_iter_eq!(
split_words(vec![Word::from(" ")], 80),
vec![Word::from(" ")]
);
}
#[test]
fn split_words_hyphen_splitter() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], 80),
vec![Word::from("foo-"), Word::from("bar")]
);
}
#[test]
fn split_words_short_line() {
assert_iter_eq!(
split_words(vec![Word::from("foobar")], 3),
vec![Word::from("foobar")]
);
}
#[test]
fn split_words_adds_penalty() {
#[derive(Debug)]
struct FixedSplitPoint;
impl WordSplitter for FixedSplitPoint {
fn split_points(&self, _: &str) -> Vec<usize> {
vec![3]
}
}
let options = Options::new(80).splitter(FixedSplitPoint);
assert_iter_eq!(
split_words(vec![Word::from("foobar")].into_iter(), &options),
vec![
Word {
word: "foo",
width: 3,
whitespace: "",
penalty: "-"
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
assert_iter_eq!(
split_words(vec![Word::from("fo-bar")].into_iter(), &options),
vec![
Word {
word: "fo-",
width: 3,
whitespace: "",
penalty: ""
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
}
}