#![cfg(feature = "text")]
use std::borrow::Cow;
use std::collections::BinaryHeap;
use std::fmt;
use std::io;
use std::ops::Range;
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
#[derive(Clone, Debug)]
pub struct TextDiffConfig {
algorithm: Algorithm,
newline_terminated: Option<bool>,
}
impl Default for TextDiffConfig {
fn default() -> TextDiffConfig {
TextDiffConfig {
algorithm: Algorithm::default(),
newline_terminated: None,
}
}
}
impl TextDiffConfig {
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
self.algorithm = alg;
self
}
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
self.newline_terminated = Some(yes);
self
}
pub fn diff_lines<'old, 'new, 'bufs>(
&self,
old: &'old str,
new: &'new str,
) -> TextDiff<'old, 'new, 'bufs> {
self.diff(
Cow::Owned(split_lines(old).collect()),
Cow::Owned(split_lines(new).collect()),
true,
)
}
pub fn diff_words<'old, 'new, 'bufs>(
&self,
old: &'old str,
new: &'new str,
) -> TextDiff<'old, 'new, 'bufs> {
self.diff(
Cow::Owned(split_words(old).collect()),
Cow::Owned(split_words(new).collect()),
false,
)
}
pub fn diff_chars<'old, 'new, 'bufs>(
&self,
old: &'old str,
new: &'new str,
) -> TextDiff<'old, 'new, 'bufs> {
self.diff(
Cow::Owned(split_chars(old).collect()),
Cow::Owned(split_chars(new).collect()),
false,
)
}
#[cfg(feature = "unicode")]
pub fn diff_graphemes<'old, 'new, 'bufs>(
&self,
old: &'old str,
new: &'new str,
) -> TextDiff<'old, 'new, 'bufs> {
self.diff(
Cow::Owned(split_graphemes(old).collect()),
Cow::Owned(split_graphemes(new).collect()),
false,
)
}
pub fn diff_slices<'old, 'new, 'bufs>(
&self,
old: &'bufs [&'old str],
new: &'bufs [&'new str],
) -> TextDiff<'old, 'new, 'bufs> {
self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
}
fn diff<'old, 'new, 'bufs>(
&self,
old: Cow<'bufs, [&'old str]>,
new: Cow<'bufs, [&'new str]>,
newline_terminated: bool,
) -> TextDiff<'old, 'new, 'bufs> {
let ops = capture_diff_slices(self.algorithm, &old, &new);
TextDiff {
old,
new,
ops,
newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
algorithm: self.algorithm,
}
}
}
pub struct TextDiff<'old, 'new, 'bufs> {
old: Cow<'bufs, [&'old str]>,
new: Cow<'bufs, [&'new str]>,
ops: Vec<DiffOp>,
newline_terminated: bool,
algorithm: Algorithm,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum ChangeTag {
Equal,
Delete,
Insert,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub struct Change<'s> {
tag: ChangeTag,
old_index: Option<usize>,
new_index: Option<usize>,
value: &'s str,
}
impl<'s> Change<'s> {
pub fn tag(&self) -> ChangeTag {
self.tag
}
pub fn old_index(&self) -> Option<usize> {
self.old_index
}
pub fn new_index(&self) -> Option<usize> {
self.new_index
}
pub fn value(&self) -> &'s str {
self.value
}
}
impl ChangeTag {
pub fn unified_sign(self) -> char {
match self {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
}
}
}
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
pub fn configure() -> TextDiffConfig {
TextDiffConfig::default()
}
pub fn from_lines(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
Self::configure().diff_lines(old, new)
}
pub fn from_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
Self::configure().diff_words(old, new)
}
pub fn from_chars(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
Self::configure().diff_chars(old, new)
}
#[cfg(feature = "unicode")]
pub fn from_graphemes(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
Self::configure().diff_graphemes(old, new)
}
pub fn from_slices(
old: &'bufs [&'old str],
new: &'bufs [&'new str],
) -> TextDiff<'old, 'new, 'bufs> {
Self::configure().diff_slices(old, new)
}
pub fn algorithm(&self) -> Algorithm {
self.algorithm
}
pub fn newline_terminated(&self) -> bool {
self.newline_terminated
}
pub fn old_slices(&self) -> &[&'old str] {
&self.old
}
pub fn new_slices(&self) -> &[&'new str] {
&self.new
}
pub fn ratio(&self) -> f32 {
let matches = self
.ops()
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = self.old.len() + self.new.len();
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
let (tag, old_range, new_range) = op.as_tag_tuple();
let mut old_index = old_range.start;
let mut new_index = new_range.start;
let mut old_slices = &self.old_slices()[op.old_range()];
let mut new_slices = &self.new_slices()[op.new_range()];
std::iter::from_fn(move || match tag {
DiffTag::Equal => {
if let Some((&first, rest)) = old_slices.split_first() {
old_slices = rest;
old_index += 1;
new_index += 1;
Some(Change {
tag: ChangeTag::Equal,
old_index: Some(old_index - 1),
new_index: Some(new_index - 1),
value: first,
})
} else {
None
}
}
DiffTag::Delete => {
if let Some((&first, rest)) = old_slices.split_first() {
old_slices = rest;
old_index += 1;
Some(Change {
tag: ChangeTag::Delete,
old_index: Some(old_index - 1),
new_index: None,
value: first,
})
} else {
None
}
}
DiffTag::Insert => {
if let Some((&first, rest)) = new_slices.split_first() {
new_slices = rest;
new_index += 1;
Some(Change {
tag: ChangeTag::Insert,
old_index: None,
new_index: Some(new_index - 1),
value: first,
})
} else {
None
}
}
DiffTag::Replace => {
if let Some((&first, rest)) = old_slices.split_first() {
old_slices = rest;
old_index += 1;
Some(Change {
tag: ChangeTag::Delete,
old_index: Some(old_index - 1),
new_index: None,
value: first,
})
} else if let Some((&first, rest)) = new_slices.split_first() {
new_slices = rest;
new_index += 1;
Some(Change {
tag: ChangeTag::Insert,
old_index: None,
new_index: Some(new_index - 1),
value: first,
})
} else {
None
}
}
})
}
pub fn ops(&self) -> &[DiffOp] {
&self.ops
}
pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
group_diff_ops(self.ops().to_vec(), n)
}
pub fn unified_diff(&self, n: usize, header: Option<(&str, &str)>) -> String {
let mut rv = Vec::<u8>::new();
self.write_unified_diff(&mut rv, n, header).unwrap();
if self.newline_terminated && rv.last() == Some(&b'\n') {
rv.truncate(rv.len() - 1);
}
unsafe { String::from_utf8_unchecked(rv) }
}
pub fn write_unified_diff<W: io::Write>(
&self,
mut w: W,
n: usize,
mut header: Option<(&str, &str)>,
) -> Result<(), io::Error> {
struct UnifiedRange(Range<usize>);
impl fmt::Display for UnifiedRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut beginning = self.0.start;
let len = self.0.end - self.0.start;
if len == 1 {
write!(f, "{}", beginning)
} else {
if len == 0 {
beginning -= 1;
}
write!(f, "{},{}", beginning, len)
}
}
}
let nl = if self.newline_terminated { "" } else { "\n" };
for group in self.grouped_ops(n) {
if let Some((old_file, new_file)) = header.take() {
writeln!(&mut w, "--- {}", old_file)?;
writeln!(&mut w, "+++ {}", new_file)?;
}
writeln!(
&mut w,
"@@ -{} +{} @@",
UnifiedRange(group[0].old_range()),
UnifiedRange(group[group.len() - 1].new_range()),
)?;
for op in group {
for change in self.iter_changes(&op) {
write!(
&mut w,
"{}{}{}",
change.tag().unified_sign(),
change.value(),
nl
)?;
}
}
}
Ok(())
}
}
fn split_lines(s: &str) -> impl Iterator<Item = &str> {
let mut iter = s.char_indices().peekable();
let mut last_pos = 0;
std::iter::from_fn(move || {
if let Some((idx, c)) = iter.next() {
let mut rv = None;
if c == '\r' {
if iter.peek().map_or(false, |x| x.1 == '\n') {
rv = Some(&s[last_pos..=idx + 1]);
iter.next();
last_pos = idx + 2;
} else {
rv = Some(&s[last_pos..=idx]);
last_pos = idx + 1;
}
} else if c == '\n' {
rv = Some(&s[last_pos..=idx]);
last_pos = idx + 1;
}
Some(rv)
} else if last_pos < s.len() {
let tmp = &s[last_pos..];
last_pos = s.len();
Some(Some(tmp))
} else {
None
}
})
.flatten()
}
fn split_words(s: &str) -> impl Iterator<Item = &str> {
let mut iter = s.char_indices().peekable();
let mut last_pos = 0;
std::iter::from_fn(move || {
if let Some((idx, c)) = iter.next() {
let mut rv = None;
if c.is_whitespace() {
let mut last = (idx, c);
while let Some(&(next_idx, next_char)) = iter.peek() {
if !next_char.is_whitespace() {
break;
}
iter.next();
last = (next_idx, next_char);
}
let whitespace_end = last.0 + last.1.len_utf8();
rv = Some(&s[last_pos..whitespace_end]);
last_pos = whitespace_end;
}
Some(rv)
} else if last_pos < s.len() {
let tmp = &s[last_pos..];
last_pos = s.len();
Some(Some(tmp))
} else {
None
}
})
.flatten()
}
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
}
#[cfg(feature = "unicode")]
fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
}
fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
let n = seq1.len() + seq2.len();
if n == 0 {
1.0
} else {
2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
}
}
pub fn unified_diff<'old, 'new>(
alg: Algorithm,
old: &'old str,
new: &'new str,
n: usize,
header: Option<(&str, &str)>,
) -> String {
TextDiff::configure()
.algorithm(alg)
.diff_lines(old, new)
.unified_diff(n, header)
}
pub fn get_close_matches<'a>(
word: &str,
possibilities: &[&'a str],
n: usize,
cutoff: f32,
) -> Vec<&'a str> {
let mut matches = BinaryHeap::new();
let seq1 = split_chars(word).collect::<Vec<_>>();
for &possibility in possibilities {
let seq2 = split_chars(possibility).collect::<Vec<_>>();
if upper_seq_ratio(&seq1, &seq2) < cutoff {
continue;
}
let diff = TextDiff::from_slices(&seq1, &seq2);
let ratio = diff.ratio();
if ratio >= cutoff {
matches.push(((ratio * u32::MAX as f32) as u32, possibility));
}
}
let mut rv = vec![];
for _ in 0..n {
if let Some((_, elt)) = matches.pop() {
rv.push(elt);
} else {
break;
}
}
rv
}
#[test]
fn test_split_lines() {
assert_eq!(
split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
);
assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
assert!(split_lines("").collect::<Vec<_>>().is_empty());
}
#[test]
fn test_split_words() {
assert_eq!(
split_words("foo bar baz\n\n aha").collect::<Vec<_>>(),
["foo ", "bar ", "baz\n\n ", "aha"]
);
}
#[test]
fn test_split_chars() {
assert_eq!(
split_chars("abcfö❄️").collect::<Vec<_>>(),
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
);
}
#[test]
#[cfg(feature = "unicode")]
fn test_split_graphemes() {
assert_eq!(
split_graphemes("abcfö❄️").collect::<Vec<_>>(),
vec!["a", "b", "c", "f", "ö", "❄️"]
);
}
#[test]
fn test_captured_ops() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
insta::assert_debug_snapshot!(&diff.ops());
}
#[test]
fn test_unified_diff() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
assert_eq!(diff.newline_terminated(), true);
insta::assert_snapshot!(&diff.unified_diff(3, Some(("old", "new"))));
}
#[test]
fn test_line_ops() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
assert_eq!(diff.newline_terminated(), true);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_char_diff() {
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
insta::assert_debug_snapshot!(diff.ops());
}
#[test]
fn test_ratio() {
let diff = TextDiff::from_chars("abcd", "bcde");
assert_eq!(diff.ratio(), 0.75);
let diff = TextDiff::from_chars("", "");
assert_eq!(diff.ratio(), 1.0);
}
#[test]
fn test_get_close_matches() {
let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
assert_eq!(matches, vec!["apple", "ape"]);
}