fix: Check that the merged text is syntactically valid and consistent with the merged tree #368
14 changed files with 323 additions and 74 deletions
|
@ -0,0 +1,4 @@
|
|||
from ..._utils import (
|
||||
foo,
|
||||
async_foo,
|
||||
)
|
|
@ -0,0 +1,14 @@
|
|||
<<<<<<< LEFT
|
||||
from ..._utils import (
|
||||
foo,
|
||||
bar,
|
||||
async_foo,
|
||||
)
|
||||
||||||| BASE
|
||||
from ..._utils import (
|
||||
foo,
|
||||
async_foo,
|
||||
)
|
||||
=======
|
||||
from ..._utils import foo, async_foo
|
||||
>>>>>>> RIGHT
|
|
@ -0,0 +1 @@
|
|||
from ..._utils import foo, bar, async_foo
|
|
@ -0,0 +1,5 @@
|
|||
from ..._utils import (
|
||||
foo,
|
||||
bar,
|
||||
async_foo,
|
||||
)
|
|
@ -0,0 +1,9 @@
|
|||
The structured resolution of this merge gives:
|
||||
|
||||
```
|
||||
from ..._utils import foo,
|
||||
bar, async_foo
|
||||
```
|
||||
which is syntactically invalid but still accepted by the Python grammar.
|
||||
|
||||
This test checks that this structured resolution is rejected.
|
|
@ -0,0 +1 @@
|
|||
from ..._utils import foo, async_foo
|
|
@ -343,7 +343,7 @@ impl<'a> AstNode<'a> {
|
|||
|
||||
/// Whether this node is isomorphic to another.
|
||||
/// This doesn't take commutativity into account.
|
||||
pub fn isomorphic_to(&'a self, other: &'a Self) -> bool {
|
||||
pub fn isomorphic_to<'b>(&'a self, other: &'b AstNode<'b>) -> bool {
|
||||
let mut zipped = self.dfs().zip(other.dfs());
|
||||
self.hash == other.hash
|
||||
&& zipped.all(|(n1, n2)| {
|
||||
|
|
|
@ -37,7 +37,7 @@ pub fn line_based_merge(
|
|||
let parsed_merge =
|
||||
line_based_merge_parsed(contents_base, contents_left, contents_right, settings);
|
||||
|
||||
MergeResult::from_parsed_merge(&parsed_merge, settings)
|
||||
parsed_merge.into_merge_result(settings)
|
||||
}
|
||||
|
||||
/// Do a line-based merge. If it is conflict-free, also check if it introduced any duplicate signatures,
|
||||
|
@ -52,7 +52,7 @@ pub(crate) fn line_based_merge_with_duplicate_signature_detection(
|
|||
let parsed_merge =
|
||||
line_based_merge_parsed(contents_base, contents_left, contents_right, settings);
|
||||
|
||||
let mut merge_result = MergeResult::from_parsed_merge(&parsed_merge, settings);
|
||||
let mut merge_result = parsed_merge.into_merge_result(settings);
|
||||
|
||||
let mut parser = TSParser::new();
|
||||
parser
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
use crate::{
|
||||
attempts::Attempt, line_based::LINE_BASED_METHOD, parsed_merge::ParsedMerge,
|
||||
settings::DisplaySettings,
|
||||
};
|
||||
use crate::attempts::Attempt;
|
||||
use log::info;
|
||||
|
||||
/// A merged output (represented as a string) together with statistics
|
||||
|
@ -46,19 +43,4 @@ impl MergeResult {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_parsed_merge(
|
||||
parsed_merge: &ParsedMerge,
|
||||
settings: &DisplaySettings,
|
||||
) -> Self {
|
||||
Self {
|
||||
contents: parsed_merge.render(settings),
|
||||
conflict_count: parsed_merge.conflict_count(),
|
||||
conflict_mass: parsed_merge.conflict_mass(),
|
||||
method: LINE_BASED_METHOD,
|
||||
// the line-based merge might have come from a non-syntax-aware tool,
|
||||
// and we cautiously assume that it does have issues
|
||||
has_additional_issues: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,9 @@ use std::borrow::Cow;
|
|||
use itertools::Itertools;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{parsed_merge::ParsedMerge, settings::DisplaySettings};
|
||||
use crate::{
|
||||
merge_result::MergeResult, parsed_merge::ParsedMerge, pcs::Revision, settings::DisplaySettings,
|
||||
};
|
||||
|
||||
/// A merged file represented as a sequence of sections,
|
||||
/// some being successfully merged and others being conflicts.
|
||||
|
@ -36,6 +38,27 @@ impl<'a> MergedText<'a> {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
/// Number of conflict sections in the text
|
||||
pub(crate) fn count_conflicts(&self) -> usize {
|
||||
self.sections
|
||||
.iter()
|
||||
.filter(|section| matches!(section, MergeSection::Conflict { .. }))
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Sum of the size of conflicts
|
||||
pub(crate) fn conflict_mass(&self) -> usize {
|
||||
self.sections
|
||||
.iter()
|
||||
.map(|section| match section {
|
||||
MergeSection::Merged(_) => 0,
|
||||
MergeSection::Conflict { base, left, right } => {
|
||||
base.len() + left.len() + right.len()
|
||||
}
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Appends merged text at the end
|
||||
pub(crate) fn push_merged(&mut self, contents: Cow<'a, str>) {
|
||||
self.sections.push(MergeSection::Merged(contents));
|
||||
|
@ -135,6 +158,25 @@ impl<'a> MergedText<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Reconstruct the source of a revision based on the merged output.
|
||||
///
|
||||
/// Because some changes from both revisions have likely already been
|
||||
/// merged in the non-conflicting sections, this is not the original revision,
|
||||
/// but rather a half-merged version of it.
|
||||
pub(crate) fn reconstruct_revision(&self, revision: Revision) -> String {
|
||||
self.sections
|
||||
.iter()
|
||||
.map(|section| match section {
|
||||
ada4a marked this conversation as resolved
|
||||
MergeSection::Merged(contents) => contents.as_ref(),
|
||||
MergeSection::Conflict { left, base, right } => match revision {
|
||||
Revision::Base => base.as_ref(),
|
||||
Revision::Left => left.as_ref(),
|
||||
Revision::Right => right.as_ref(),
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Renders the full file according to the supplied [`DisplaySettings`]
|
||||
pub(crate) fn render(&self, settings: &DisplaySettings) -> String {
|
||||
// if all the chunks are `Merged`, just concatenate them all
|
||||
|
@ -344,6 +386,23 @@ impl<'a> MergedText<'a> {
|
|||
output.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
/// Render to a merge result
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
pub(crate) fn into_merge_result(
|
||||
&self,
|
||||
settings: &DisplaySettings,
|
||||
method: &'static str,
|
||||
) -> MergeResult {
|
||||
let rendered = self.render(settings);
|
||||
MergeResult {
|
||||
contents: rendered,
|
||||
conflict_count: self.count_conflicts(),
|
||||
conflict_mass: self.conflict_mass(),
|
||||
method,
|
||||
has_additional_issues: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -556,4 +615,44 @@ there we go
|
|||
|
||||
assert_eq!(merged_text.render(&DisplaySettings::default()), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconstruct_revision() {
|
||||
let merged_text = MergedText {
|
||||
sections: vec![
|
||||
merged("let's say "),
|
||||
conflict("ho", "hi", "ha"),
|
||||
merged(" to "),
|
||||
conflict("you", "everyone", "me"),
|
||||
merged("!"),
|
||||
],
|
||||
};
|
||||
assert_eq!(
|
||||
merged_text.reconstruct_revision(Revision::Base),
|
||||
"let's say ho to you!"
|
||||
);
|
||||
assert_eq!(
|
||||
merged_text.reconstruct_revision(Revision::Left),
|
||||
"let's say hi to everyone!"
|
||||
);
|
||||
assert_eq!(
|
||||
merged_text.reconstruct_revision(Revision::Right),
|
||||
"let's say ha to me!"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conflict_count_and_mass() {
|
||||
let merged_text = MergedText {
|
||||
sections: vec![
|
||||
merged("let's say "),
|
||||
conflict("ho", "hi", "ha"),
|
||||
merged(" to "),
|
||||
conflict("you", "everyone", "me"),
|
||||
merged("!"),
|
||||
],
|
||||
};
|
||||
assert_eq!(merged_text.conflict_mass(), 19);
|
||||
assert_eq!(merged_text.count_conflicts(), 2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ use std::{
|
|||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
use itertools::Itertools;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
|
||||
use crate::{
|
||||
ast::AstNode,
|
||||
|
@ -275,15 +275,124 @@ impl<'a> MergedTree<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Checks if the merged tree is isomorphic to a parsed source,
|
||||
/// when considered at a particular revision.
|
||||
/// This is used as a safety check to make sure that the rendered
|
||||
/// version of this merge (which is then re-parsed) is faithful to
|
||||
/// the intended merge structure, as a means of detecting invalid
|
||||
/// whitespace generation or merges that are syntactically invalid.
|
||||
pub fn isomorphic_to_source<'b>(
|
||||
&'a self,
|
||||
other_node: &'b AstNode<'b>,
|
||||
revision: Revision,
|
||||
class_mapping: &ClassMapping<'a>,
|
||||
) -> bool {
|
||||
match self {
|
||||
MergedTree::ExactTree {
|
||||
node, revisions, ..
|
||||
} => {
|
||||
let ast_node = class_mapping.node_at_rev(*node, revisions.any()).expect(
|
||||
"inconsistency between revision set of ExactTree and the class mapping",
|
||||
);
|
||||
ast_node.isomorphic_to(other_node)
|
||||
}
|
||||
MergedTree::MixedTree { node, children, .. } => {
|
||||
if node.grammar_name() != other_node.grammar_name {
|
||||
return false;
|
||||
}
|
||||
// If one of the children is a line-based merge, we just give up
|
||||
// and assume that the nodes are isomorphic. This is because
|
||||
// the line-based merge might contain any number of actual children,
|
||||
// so we are unable to match the other children together.
|
||||
// It would be better to re-parse the textual merge, but that would assume
|
||||
// the ability to parse a snippet of text for a particular node type, which
|
||||
// is not supported by tree-sitter yet:
|
||||
// https://github.com/tree-sitter/tree-sitter/issues/711
|
||||
let contains_line_based_merge = children
|
||||
.iter()
|
||||
.any(|c| matches!(c, MergedTree::LineBasedMerge { .. }));
|
||||
if contains_line_based_merge {
|
||||
return true;
|
||||
}
|
||||
let children_at_rev = children
|
||||
.iter()
|
||||
.flat_map(|child| match child {
|
||||
MergedTree::LineBasedMerge { .. } => {
|
||||
unreachable!(
|
||||
"line-based merge should have been caught by the earlier filter"
|
||||
)
|
||||
}
|
||||
MergedTree::Conflict { base, left, right } => {
|
||||
let nodes = match revision {
|
||||
Revision::Base => base,
|
||||
Revision::Left => left,
|
||||
Revision::Right => right,
|
||||
};
|
||||
nodes.iter().copied().map(MergedChild::Original).collect()
|
||||
}
|
||||
_ => {
|
||||
vec![MergedChild::Merged(child)]
|
||||
}
|
||||
})
|
||||
.filter(|child| {
|
||||
// filter out nodes which wouldn't be present in a parsed tree,
|
||||
// so as not to create a mismatch in the number of children
|
||||
match child {
|
||||
MergedChild::Merged(MergedTree::CommutativeChildSeparator {
|
||||
separator,
|
||||
}) => !separator.trim().is_empty(),
|
||||
MergedChild::Merged(MergedTree::MixedTree { children, .. }) => {
|
||||
!children.is_empty()
|
||||
}
|
||||
_ => true,
|
||||
}
|
||||
});
|
||||
children_at_rev
|
||||
.zip_longest(&other_node.children)
|
||||
.all(|pair| {
|
||||
if let EitherOrBoth::Both(child, other_child) = pair {
|
||||
match child {
|
||||
MergedChild::Merged(merged_tree) => merged_tree
|
||||
.isomorphic_to_source(other_child, revision, class_mapping),
|
||||
MergedChild::Original(ast_node) => {
|
||||
ast_node.isomorphic_to(other_child)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
MergedTree::LineBasedMerge { .. } => {
|
||||
// See above
|
||||
true
|
||||
}
|
||||
MergedTree::Conflict { .. } => {
|
||||
// Conflict is only allowed to appear as a child of another node, in which case
|
||||
// it will be flattened above
|
||||
false
|
||||
}
|
||||
MergedTree::CommutativeChildSeparator { separator } => {
|
||||
separator.trim() == other_node.source.trim()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Renders the tree to a series of strings, with merged and conflicting sections
|
||||
pub fn to_merged_text(&'a self, class_mapping: &ClassMapping<'a>) -> MergedText<'a> {
|
||||
let mut merged_text = MergedText::new();
|
||||
self.pretty_print_recursively(&mut merged_text, class_mapping, None, "");
|
||||
merged_text
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
/// Pretty-prints the result tree into its final output. Exciting!
|
||||
pub fn pretty_print<'u: 'a>(
|
||||
&'u self,
|
||||
class_mapping: &ClassMapping<'a>,
|
||||
settings: &DisplaySettings,
|
||||
) -> String {
|
||||
let mut output = MergedText::new();
|
||||
self.pretty_print_recursively(&mut output, class_mapping, None, "");
|
||||
output.render(settings)
|
||||
self.to_merged_text(class_mapping).render(settings)
|
||||
}
|
||||
|
||||
/// Recursively pretty-prints a sub part of the result tree.
|
||||
|
@ -623,31 +732,6 @@ impl<'a> MergedTree<'a> {
|
|||
.map_or("", |(_, shift)| shift)
|
||||
}
|
||||
|
||||
/// The number of conflicts in this merge
|
||||
pub fn count_conflicts(&self) -> usize {
|
||||
match self {
|
||||
Self::ExactTree { .. } | Self::CommutativeChildSeparator { .. } => 0,
|
||||
Self::MixedTree { children, .. } => children.iter().map(Self::count_conflicts).sum(),
|
||||
Self::Conflict { .. } => 1,
|
||||
Self::LineBasedMerge { parsed, .. } => parsed.conflict_count(),
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of conflicting bytes, as an attempt to quantify the effort
|
||||
/// required to solve them.
|
||||
pub fn conflict_mass(&self) -> usize {
|
||||
match self {
|
||||
Self::ExactTree { .. } | Self::CommutativeChildSeparator { .. } => 0,
|
||||
Self::MixedTree { children, .. } => children.iter().map(Self::conflict_mass).sum(),
|
||||
Self::Conflict { base, left, right } => {
|
||||
Self::pretty_print_astnode_list(Revision::Left, left).len()
|
||||
+ Self::pretty_print_astnode_list(Revision::Base, base).len()
|
||||
+ Self::pretty_print_astnode_list(Revision::Right, right).len()
|
||||
}
|
||||
Self::LineBasedMerge { parsed, .. } => parsed.conflict_mass(),
|
||||
}
|
||||
}
|
||||
|
||||
fn pretty_print_astnode_list(_revision: Revision, list: &[&'a AstNode<'a>]) -> String {
|
||||
let mut output = String::new();
|
||||
let mut first = true;
|
||||
|
@ -694,6 +778,15 @@ impl Display for MergedTree<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents a child from a MergedTree::MixedTree
|
||||
/// where any conflicts have been replaced by their version in
|
||||
/// one revision.
|
||||
/// Only used internally in `MergedTree::isomorphic_to_source`.
|
||||
enum MergedChild<'a, 'b> {
|
||||
Merged(&'a MergedTree<'a>),
|
||||
Original(&'b AstNode<'b>),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -4,7 +4,9 @@ use regex::Regex;
|
|||
|
||||
use crate::{
|
||||
ast::{Ast, AstNode},
|
||||
line_based::LINE_BASED_METHOD,
|
||||
matching::Matching,
|
||||
merge_result::MergeResult,
|
||||
pcs::Revision,
|
||||
settings::DisplaySettings,
|
||||
};
|
||||
|
@ -414,7 +416,7 @@ impl<'a> ParsedMerge<'a> {
|
|||
None
|
||||
}
|
||||
|
||||
// Number of conflicts in this merge
|
||||
/// Number of conflicts in this merge
|
||||
pub fn conflict_count(&self) -> usize {
|
||||
self.chunks
|
||||
.iter()
|
||||
|
@ -422,8 +424,8 @@ impl<'a> ParsedMerge<'a> {
|
|||
.count()
|
||||
}
|
||||
|
||||
// Number of bytes of conflicting content, which is an attempt
|
||||
// at quantifying the effort it takes to resolve the conflicts.
|
||||
/// Number of bytes of conflicting content, which is an attempt
|
||||
/// at quantifying the effort it takes to resolve the conflicts.
|
||||
pub fn conflict_mass(&self) -> usize {
|
||||
self.chunks
|
||||
.iter()
|
||||
|
@ -438,10 +440,24 @@ impl<'a> ParsedMerge<'a> {
|
|||
.sum()
|
||||
}
|
||||
|
||||
// Whether the merge is empty when rendered
|
||||
/// Whether the merge is empty when rendered
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.chunks.is_empty() || self.render_conflictless().is_some_and(|s| s.is_empty())
|
||||
}
|
||||
|
||||
/// Render into a merge result with the provided settings
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
pub(crate) fn into_merge_result(&self, settings: &DisplaySettings<'_>) -> MergeResult {
|
||||
MergeResult {
|
||||
contents: self.render(settings),
|
||||
conflict_count: self.conflict_count(),
|
||||
conflict_mass: self.conflict_mass(),
|
||||
method: LINE_BASED_METHOD,
|
||||
// the line-based merge might have come from a non-syntax-aware tool,
|
||||
// and we cautiously assume that it does have issues
|
||||
has_additional_issues: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
12
src/solve.rs
12
src/solve.rs
|
@ -51,13 +51,11 @@ pub fn resolve_merge_cascading<'a>(
|
|||
Err(err) => warn!("Error while resolving conflicts: {err}"),
|
||||
}
|
||||
|
||||
let rendered_from_parsed = MergeResult {
|
||||
contents: parsed_merge.render(&settings),
|
||||
conflict_count: parsed_merge.conflict_count(),
|
||||
conflict_mass: parsed_merge.conflict_mass(),
|
||||
method: FROM_PARSED_ORIGINAL,
|
||||
has_additional_issues: false,
|
||||
};
|
||||
let mut rendered_from_parsed = parsed_merge.into_merge_result(&settings);
|
||||
// For now, we assume that the original merge with conflicts is free of syntax errors
|
||||
// and duplicate signatures, so that it has priority over any other merge that we'd produce
|
||||
// and would be syntactically invalid.
|
||||
rendered_from_parsed.has_additional_issues = false;
|
||||
solves.push(rendered_from_parsed);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,15 +98,42 @@ pub fn structured_merge(
|
|||
);
|
||||
debug!("{result_tree}");
|
||||
|
||||
Ok(MergeResult {
|
||||
contents: result_tree.pretty_print(&class_mapping, settings),
|
||||
conflict_count: result_tree.count_conflicts(),
|
||||
conflict_mass: result_tree.conflict_mass(),
|
||||
method: if parsed_merge.is_none() {
|
||||
FULLY_STRUCTURED_METHOD
|
||||
} else {
|
||||
STRUCTURED_RESOLUTION_METHOD
|
||||
},
|
||||
has_additional_issues: false,
|
||||
})
|
||||
let merged_text = result_tree.to_merged_text(&class_mapping);
|
||||
|
||||
// Check that the rendered merge is faithful to the tree
|
||||
let revisions_to_check = if merged_text.count_conflicts() == 0 {
|
||||
[Revision::Base].as_slice()
|
||||
} else {
|
||||
[Revision::Base, Revision::Left, Revision::Right].as_slice()
|
||||
};
|
||||
for revision in revisions_to_check {
|
||||
let merged_revision = merged_text.reconstruct_revision(*revision);
|
||||
let arena = Arena::new();
|
||||
let ref_arena = Arena::new();
|
||||
let tree = parse(
|
||||
&mut parser,
|
||||
&merged_revision,
|
||||
lang_profile,
|
||||
&arena,
|
||||
&ref_arena,
|
||||
)
|
||||
.map_err(|err| {
|
||||
format!(
|
||||
"merge discarded because rendered revision {revision} has a parsing error: {err}"
|
||||
)
|
||||
})?;
|
||||
if !result_tree.isomorphic_to_source(tree.root(), *revision, &class_mapping) {
|
||||
debug!(
|
||||
"discarding merge because rendered revision {revision} isn't isomorphic to the merged tree"
|
||||
);
|
||||
return Err("merge discarded after isomorphism check".to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
let method = if parsed_merge.is_none() {
|
||||
FULLY_STRUCTURED_METHOD
|
||||
} else {
|
||||
STRUCTURED_RESOLUTION_METHOD
|
||||
};
|
||||
Ok(merged_text.into_merge_result(settings, method))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue
misc: I feel like we have this logic of "get a particular revision from a conflict" repeated a lot of times throughout the codebase. I wonder if it would be worth it to add a struct called
Conflict
which would look something like this:and use it inside all the different enums that have
Conflict
as a variant:One argument against doing this would be that sometimes we want to do something will all sides, not just one.
I had a quick look at the rest of the codebase and there are indeed a couple of other places where we do similar things. I wouldn't be opposed to such a refactoring, even though I'm not 100% sure if it will make the code more readable / maintainable (the
Conflict(Conflict)
definition is perhaps a bit confusing). Either way, perhaps it's worth doing it as a separate follow-up PR, no?Of course. I just wanted to note that down for myself so that I don't forget