use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::time::{Duration, Instant};
mod abstraction;
#[cfg(feature = "inline")]
mod inline;
mod utils;
pub use self::abstraction::{DiffableStr, DiffableStrRef};
#[cfg(feature = "inline")]
pub use self::inline::InlineChange;
use self::utils::{upper_seq_ratio, QuickSeqRatio};
use crate::algorithms::IdentifyDistinct;
use crate::iter::{AllChangesIter, ChangesIter};
use crate::udiff::UnifiedDiff;
use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
#[derive(Debug, Clone, Copy)]
enum Deadline {
Absolute(Instant),
Relative(Duration),
}
impl Deadline {
fn into_instant(self) -> Instant {
match self {
Deadline::Absolute(instant) => instant,
Deadline::Relative(duration) => Instant::now() + duration,
}
}
}
#[derive(Clone, Debug)]
pub struct TextDiffConfig {
algorithm: Algorithm,
newline_terminated: Option<bool>,
deadline: Option<Deadline>,
}
impl Default for TextDiffConfig {
fn default() -> TextDiffConfig {
TextDiffConfig {
algorithm: Algorithm::default(),
newline_terminated: None,
deadline: None,
}
}
}
impl TextDiffConfig {
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
self.algorithm = alg;
self
}
pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
self.deadline = Some(Deadline::Absolute(deadline));
self
}
pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
self.deadline = Some(Deadline::Relative(timeout));
self
}
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
self.newline_terminated = Some(yes);
self
}
pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
&self,
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
self.diff(
Cow::Owned(old.as_diffable_str().tokenize_lines()),
Cow::Owned(new.as_diffable_str().tokenize_lines()),
true,
)
}
pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
&self,
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
self.diff(
Cow::Owned(old.as_diffable_str().tokenize_words()),
Cow::Owned(new.as_diffable_str().tokenize_words()),
false,
)
}
pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
&self,
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
self.diff(
Cow::Owned(old.as_diffable_str().tokenize_chars()),
Cow::Owned(new.as_diffable_str().tokenize_chars()),
false,
)
}
#[cfg(feature = "unicode")]
pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
&self,
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
self.diff(
Cow::Owned(old.as_diffable_str().tokenize_unicode_words()),
Cow::Owned(new.as_diffable_str().tokenize_unicode_words()),
false,
)
}
#[cfg(feature = "unicode")]
pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
&self,
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
self.diff(
Cow::Owned(old.as_diffable_str().tokenize_graphemes()),
Cow::Owned(new.as_diffable_str().tokenize_graphemes()),
false,
)
}
pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
&self,
old: &'bufs [&'old T],
new: &'bufs [&'new T],
) -> TextDiff<'old, 'new, 'bufs, T> {
self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
}
fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
&self,
old: Cow<'bufs, [&'old T]>,
new: Cow<'bufs, [&'new T]>,
newline_terminated: bool,
) -> TextDiff<'old, 'new, 'bufs, T> {
let deadline = self.deadline.map(|x| x.into_instant());
let ops = if old.len() > 100 || new.len() > 100 {
let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len());
capture_diff_deadline(
self.algorithm,
ih.old_lookup(),
ih.old_range(),
ih.new_lookup(),
ih.new_range(),
deadline,
)
} else {
capture_diff_deadline(
self.algorithm,
&old[..],
0..old.len(),
&new[..],
0..new.len(),
deadline,
)
};
TextDiff {
old,
new,
ops,
newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
algorithm: self.algorithm,
}
}
}
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
old: Cow<'bufs, [&'old T]>,
new: Cow<'bufs, [&'new T]>,
ops: Vec<DiffOp>,
newline_terminated: bool,
algorithm: Algorithm,
}
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
pub fn configure() -> TextDiffConfig {
TextDiffConfig::default()
}
pub fn from_lines<T: DiffableStrRef + ?Sized>(
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
TextDiff::configure().diff_lines(old, new)
}
pub fn from_words<T: DiffableStrRef + ?Sized>(
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
TextDiff::configure().diff_words(old, new)
}
pub fn from_chars<T: DiffableStrRef + ?Sized>(
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
TextDiff::configure().diff_chars(old, new)
}
#[cfg(feature = "unicode")]
pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
TextDiff::configure().diff_unicode_words(old, new)
}
#[cfg(feature = "unicode")]
pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
old: &'old T,
new: &'new T,
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
TextDiff::configure().diff_graphemes(old, new)
}
}
impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
pub fn from_slices(
old: &'bufs [&'old T],
new: &'bufs [&'new T],
) -> TextDiff<'old, 'new, 'bufs, T> {
TextDiff::configure().diff_slices(old, new)
}
pub fn algorithm(&self) -> Algorithm {
self.algorithm
}
pub fn newline_terminated(&self) -> bool {
self.newline_terminated
}
pub fn old_slices(&self) -> &[&'old T] {
&self.old
}
pub fn new_slices(&self) -> &[&'new T] {
&self.new
}
pub fn ratio(&self) -> f32 {
get_diff_ratio(self.ops(), self.old.len(), self.new.len())
}
pub fn iter_changes<'x, 'slf>(
&'slf self,
op: &DiffOp,
) -> ChangesIter<'slf, 'x, [&'x T], [&'x T], T>
where
'x: 'slf,
'old: 'x,
'new: 'x,
{
op.iter_changes(self.old_slices(), self.new_slices())
}
pub fn ops(&self) -> &[DiffOp] {
&self.ops
}
pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
group_diff_ops(self.ops().to_vec(), n)
}
pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T>
where
'x: 'slf + 'old + 'new,
'old: 'x,
'new: 'x,
{
AllChangesIter::new(&self.old[..], &self.new[..], self.ops())
}
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
UnifiedDiff::from_text_diff(self)
}
#[cfg(feature = "inline")]
pub fn iter_inline_changes<'slf>(
&'slf self,
op: &DiffOp,
) -> impl Iterator<Item = InlineChange<'slf, T>> + '_
where
'slf: 'old + 'new,
{
inline::iter_inline_changes(self, op)
}
}
pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
word: &T,
possibilities: &[&'a T],
n: usize,
cutoff: f32,
) -> Vec<&'a T> {
let mut matches = BinaryHeap::new();
let seq1 = word.tokenize_chars();
let quick_ratio = QuickSeqRatio::new(&seq1);
for &possibility in possibilities {
let seq2 = possibility.tokenize_chars();
if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
continue;
}
let diff = TextDiff::from_slices(&seq1, &seq2);
let ratio = diff.ratio();
if ratio >= cutoff {
matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility)));
}
}
let mut rv = vec![];
for _ in 0..n {
if let Some((_, elt)) = matches.pop() {
rv.push(elt.0);
} else {
break;
}
}
rv
}
#[test]
fn test_captured_ops() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
insta::assert_debug_snapshot!(&diff.ops());
}
#[test]
fn test_captured_word_ops() {
let diff = TextDiff::from_words(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_unified_diff() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
assert_eq!(diff.newline_terminated(), true);
insta::assert_snapshot!(&diff
.unified_diff()
.context_radius(3)
.header("old", "new")
.to_string());
}
#[test]
fn test_line_ops() {
let a = "Hello World\nsome stuff here\nsome more stuff here\n";
let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
let diff = TextDiff::from_lines(a, b);
assert_eq!(diff.newline_terminated(), true);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
#[cfg(feature = "bytes")]
{
let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
let byte_changes = byte_diff
.ops()
.iter()
.flat_map(|op| byte_diff.iter_changes(op))
.collect::<Vec<_>>();
for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
}
}
}
#[test]
fn test_virtual_newlines() {
let diff = TextDiff::from_lines("a\nb", "a\nc\n");
assert_eq!(diff.newline_terminated(), true);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_char_diff() {
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
insta::assert_debug_snapshot!(diff.ops());
#[cfg(feature = "bytes")]
{
let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
assert_eq!(diff.ops(), byte_diff.ops());
}
}
#[test]
fn test_ratio() {
let diff = TextDiff::from_chars("abcd", "bcde");
assert_eq!(diff.ratio(), 0.75);
let diff = TextDiff::from_chars("", "");
assert_eq!(diff.ratio(), 1.0);
}
#[test]
fn test_get_close_matches() {
let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
assert_eq!(matches, vec!["apple", "ape"]);
let matches = get_close_matches(
"hulo",
&[
"hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo",
][..],
5,
0.7,
);
assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]);
}
#[test]
fn test_lifetimes_on_iter() {
use crate::Change;
fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec<Change<'x, T::Output>>
where
T: DiffableStrRef + ?Sized,
{
TextDiff::from_lines(old, new).iter_all_changes().collect()
}
let a = "1\n2\n3\n".to_string();
let b = "1\n99\n3\n".to_string();
let changes = diff_lines(&a, &b);
insta::assert_debug_snapshot!(&changes);
}