From 0893b0f33186ed12fe8fff67bdd61b4905d015a3 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 12 Nov 2024 23:41:14 +0100 Subject: [PATCH 1/3] WIP: Support for restricting commutativity --- .../method_conflict_with_comments/Base.cpp | 8 +++++ .../Expected.cpp | 15 +++++++++ .../method_conflict_with_comments/Left.cpp | 12 +++++++ .../method_conflict_with_comments/Right.cpp | 11 +++++++ .../working/field_visibility_conflict/Base.cc | 4 +++ .../field_visibility_conflict/Expected.cc | 11 +++++++ .../working/field_visibility_conflict/Left.cc | 6 ++++ .../field_visibility_conflict/Right.cc | 5 +++ src/lang_profile.rs | 31 ++++++++++++++++++- src/supported_langs.rs | 17 ++++++++-- src/tree_builder.rs | 10 ++++++ 11 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 examples/cpp/failing/method_conflict_with_comments/Base.cpp create mode 100644 examples/cpp/failing/method_conflict_with_comments/Expected.cpp create mode 100644 examples/cpp/failing/method_conflict_with_comments/Left.cpp create mode 100644 examples/cpp/failing/method_conflict_with_comments/Right.cpp create mode 100644 examples/cpp/working/field_visibility_conflict/Base.cc create mode 100644 examples/cpp/working/field_visibility_conflict/Expected.cc create mode 100644 examples/cpp/working/field_visibility_conflict/Left.cc create mode 100644 examples/cpp/working/field_visibility_conflict/Right.cc diff --git a/examples/cpp/failing/method_conflict_with_comments/Base.cpp b/examples/cpp/failing/method_conflict_with_comments/Base.cpp new file mode 100644 index 0000000..65f6455 --- /dev/null +++ b/examples/cpp/failing/method_conflict_with_comments/Base.cpp @@ -0,0 +1,8 @@ +#include + +class MyClass { + void run(int argc) { + printf("too few arguments\n"); + exit(1); + } +} diff --git a/examples/cpp/failing/method_conflict_with_comments/Expected.cpp b/examples/cpp/failing/method_conflict_with_comments/Expected.cpp new file mode 100644 index 0000000..1b080c2 --- /dev/null +++ b/examples/cpp/failing/method_conflict_with_comments/Expected.cpp @@ -0,0 +1,15 @@ +#include + +class MyClass { + void run(int argc) { + printf("too few arguments\n"); + exit(1); + } + // central backbone of the algorithm + void run() { + printf("hello\n"); + } + void run(bool reallyFast) { + printf("world\n"); + } +} diff --git a/examples/cpp/failing/method_conflict_with_comments/Left.cpp b/examples/cpp/failing/method_conflict_with_comments/Left.cpp new file mode 100644 index 0000000..2570e84 --- /dev/null +++ b/examples/cpp/failing/method_conflict_with_comments/Left.cpp @@ -0,0 +1,12 @@ +#include + +class MyClass { + void run(int argc) { + printf("too few arguments\n"); + exit(1); + } + // central backbone of the algorithm + void run() { + printf("hello\n"); + } +} diff --git a/examples/cpp/failing/method_conflict_with_comments/Right.cpp b/examples/cpp/failing/method_conflict_with_comments/Right.cpp new file mode 100644 index 0000000..825b054 --- /dev/null +++ b/examples/cpp/failing/method_conflict_with_comments/Right.cpp @@ -0,0 +1,11 @@ +#include + +class MyClass { + void run(int argc) { + printf("too few arguments\n"); + exit(1); + } + void run(bool reallyFast) { + printf("world\n"); + } +} diff --git a/examples/cpp/working/field_visibility_conflict/Base.cc b/examples/cpp/working/field_visibility_conflict/Base.cc new file mode 100644 index 0000000..89f86f4 --- /dev/null +++ b/examples/cpp/working/field_visibility_conflict/Base.cc @@ -0,0 +1,4 @@ +class MyClass { + public: + int field_1; +} diff --git a/examples/cpp/working/field_visibility_conflict/Expected.cc b/examples/cpp/working/field_visibility_conflict/Expected.cc new file mode 100644 index 0000000..f576808 --- /dev/null +++ b/examples/cpp/working/field_visibility_conflict/Expected.cc @@ -0,0 +1,11 @@ +class MyClass { + public: + int field_1; +<<<<<<< LEFT + private: + size_t field_2; +||||||| BASE +======= + char field_3; +>>>>>>> RIGHT +} diff --git a/examples/cpp/working/field_visibility_conflict/Left.cc b/examples/cpp/working/field_visibility_conflict/Left.cc new file mode 100644 index 0000000..eeeeacc --- /dev/null +++ b/examples/cpp/working/field_visibility_conflict/Left.cc @@ -0,0 +1,6 @@ +class MyClass { + public: + int field_1; + private: + size_t field_2; +} diff --git a/examples/cpp/working/field_visibility_conflict/Right.cc b/examples/cpp/working/field_visibility_conflict/Right.cc new file mode 100644 index 0000000..f7c98f0 --- /dev/null +++ b/examples/cpp/working/field_visibility_conflict/Right.cc @@ -0,0 +1,5 @@ +class MyClass { + public: + int field_1; + char field_3; +} diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 9a7374f..477a4e4 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use itertools::Itertools; use tree_sitter::Language; @@ -122,7 +124,9 @@ pub struct CommutativeParent { // any left delimiter that can come before all children pub left_delim: Option<&'static str>, // any right delimiter that can come after all children - pub right_delim: Option<&'static str>, + pub right_delim: Option<&'static str>, + // any restrictions on which types of children are allowed to commute together. If empty, all children can commute together. + pub children_groups: Vec, } impl CommutativeParent { @@ -133,6 +137,7 @@ impl CommutativeParent { separator, left_delim: None, right_delim: None, + children_groups: Vec::new(), } } @@ -148,6 +153,7 @@ impl CommutativeParent { separator, left_delim: Some(left_delim), right_delim: Some(right_delim), + children_groups: Vec::new(), } } @@ -162,6 +168,29 @@ impl CommutativeParent { separator, left_delim: Some(left_delim), right_delim: None, + children_groups: Vec::new(), + } + } + + /// Can children with the supplied types commute together? + pub(crate) fn children_can_commute(&self, node_types: &HashSet<&str>) -> bool { + self.children_groups.is_empty() || + self.children_groups.iter() + .any(|group| group.node_types.is_superset(&node_types)) + } +} + +/// A group of children of a commutative node which are allowed to commute together +#[derive(Debug, Clone)] +pub struct ChildrenGroup { + /// The types of nodes, as gramman names + pub node_types: HashSet<&'static str>, +} + +impl ChildrenGroup { + pub(crate) fn new(types: &[&'static str]) -> ChildrenGroup { + ChildrenGroup { + node_types: types.iter().copied().collect(), } } } diff --git a/src/supported_langs.rs b/src/supported_langs.rs index 7aa4fa9..8748e69 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -1,6 +1,8 @@ +use std::process::Child; + use crate::{ - lang_profile::{CommutativeParent, LangProfile}, - signature::{signature, PathStep::ChildType, PathStep::Field}, + lang_profile::{ChildrenGroup, CommutativeParent, LangProfile}, + signature::{signature, PathStep::{ChildType, Field}}, }; /// Returns the list of supported language profiles, @@ -209,7 +211,16 @@ pub fn supported_languages() -> Vec { atomic_nodes: vec![], commutative_parents: vec![ CommutativeParent::new("initializer_list", "{", ",", "}"), - CommutativeParent::new("field_declaration_list", "{\n", "\n", "\n}\n"), // TODO this isn't quite true for visibility annotations like "public:" + CommutativeParent { + parent_type: "field_declaration_list", + separator: "\n", + left_delim: Some("{\n"), + right_delim: Some("\n}\n"), + children_groups: vec![ + ChildrenGroup::new(&["field_declaration"]), + ChildrenGroup::new(&["function_definition"]) + ], + } ], signatures: vec![ signature("initializer_pair", vec![vec![Field("designator")]]), diff --git a/src/tree_builder.rs b/src/tree_builder.rs index 8af991b..9c8b3b8 100644 --- a/src/tree_builder.rs +++ b/src/tree_builder.rs @@ -647,6 +647,16 @@ impl<'a, 'b> TreeBuilder<'a, 'b> { trimmed_right_delim, ); + // check that all the nodes involved are allowed to commute in this context + let child_types: HashSet<&str> = base_leaders.iter() + .chain(left_leaders.iter()) + .chain(right_leaders.iter()) + .map(|leader| leader.grammar_name()) + .collect(); + if !commutative_parent.children_can_commute(&child_types) { + return Err("The children are not allowed to commute".to_string()) + } + // then, compute the symmetric difference between the base and right lists let right_removed = base_leaders .iter() -- 2.47.3 From baed83ab85fdde40ff25c9dbb43889f4ea3a11a0 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 19 Nov 2024 14:03:55 +0100 Subject: [PATCH 2/3] Simplify syntax to declare children groups --- src/lang_profile.rs | 11 ++++++++++- src/supported_langs.rs | 15 +++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 477a4e4..48a62dc 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::{collections::HashSet, process::Child}; use itertools::Itertools; use tree_sitter::Language; @@ -172,6 +172,15 @@ impl CommutativeParent { } } + /// Short-hand to restrict a commutative parent to some children groups + pub(crate) fn restricted_to_groups(mut self, groups: &[&[&'static str]]) -> CommutativeParent { + let children_groups = groups + .into_iter().map(|types| ChildrenGroup::new(&types)) + .collect(); + self.children_groups = children_groups; + self + } + /// Can children with the supplied types commute together? pub(crate) fn children_can_commute(&self, node_types: &HashSet<&str>) -> bool { self.children_groups.is_empty() || diff --git a/src/supported_langs.rs b/src/supported_langs.rs index 8748e69..d448c31 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -211,16 +211,11 @@ pub fn supported_languages() -> Vec { atomic_nodes: vec![], commutative_parents: vec![ CommutativeParent::new("initializer_list", "{", ",", "}"), - CommutativeParent { - parent_type: "field_declaration_list", - separator: "\n", - left_delim: Some("{\n"), - right_delim: Some("\n}\n"), - children_groups: vec![ - ChildrenGroup::new(&["field_declaration"]), - ChildrenGroup::new(&["function_definition"]) - ], - } + CommutativeParent::new("field_declaration_list", "{\n", "\n", "\n}\n") + .restricted_to_groups(&[ + &["field_declaration"], + &["function_definition"] + ]) ], signatures: vec![ signature("initializer_pair", vec![vec![Field("designator")]]), -- 2.47.3 From 3b85eac4cbcb7a7d03ee23295da47dd8df0e94b7 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 19 Nov 2024 14:39:25 +0100 Subject: [PATCH 3/3] Add docs --- doc/src/adding-a-language.md | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/src/adding-a-language.md b/doc/src/adding-a-language.md index f71134b..b241dde 100644 --- a/doc/src/adding-a-language.md +++ b/doc/src/adding-a-language.md @@ -112,14 +112,11 @@ which gives: This shows us how our source code is parsed into a tree. We see that the `using` statements are parsed as `using_directive` nodes in the tree. -To let Mergiraf reorder `using` statements to fix conflicts, we actually need to specify that they can be reordered with any of their siblings (any other child of their parent in the syntax tree). -In this example, their parent is the root of the tree (with type `compilation_unit`), which means that we'll allow reordering `using` statements with other top-level elements, such as the namespace declaration. +To let Mergiraf reorder `using` statements to fix conflicts, we declare that their parent is a commutative root, which will by default let them commute with any of their siblings (any other child of their parent in the syntax tree). +In this example, their parent is the root of the tree (with type `compilation_unit`), which means that we'll allow reordering `using` statements with other top-level elements, such as the namespace declaration. +We'll see later how to restrict this commutativity by defining children groups. -This might feel a little odd, given that `using` statements will almost -invariably appear at the top of the file, and we definitely don't want to have Mergiraf move them to the end of the file when merging for instance. -However, Mergiraf only relies on commutative parents to solve conflicts, such as inserting two elements at the same place. It keeps the order of other existing elements. C# being an object-oriented language, it does not allow for top-level instructions (everything happens inside classes, whose declaration order does not matter either), so in this case it is appropriate to declare `compilation_unit` as a commutative parent. - -We can do so in the language profile: +The commutative parent can be defined in the language profile: ```rust LangProfile { commutative_parents: vec![ @@ -179,6 +176,31 @@ CommutativeRoot::new("declaration_list", "{", "\n\n", "}") ``` and so on. While it is useful to identify as many commutative parent types as possible, not being exhaustive is not a problem as it will only prevent the automated resolution of certain conflicts, but should not otherwise degrade the quality of merges. +## Adding children groups + +Within a commutative parent, it is possible to restrict which types of children are able to commute together. +In C#, the `compilation_unit` root node can not only contain `using` statements, but also some global statements, namespace declarations and type declarations, for instance. +We can declare *children groups*, which are sets of node types which are allowed to commute together. +By declaring a children group which contains only `using_directive`, we make sure that `using` directive can only be reordered with other `using` directives: +```rust +CommutativeRoot::new("declaration_list", "{", "\n\n", "}") + .restricted_to_groups(&[ + &["using_directive"] + ]) +``` + +As soon as a children group is declared, that restricts the commutativity of all children of the commutative parent. +Conflicts can only be solved if all children involved are part of the same group. So, in this case it's also worth +adding other children groups for other types of nodes which can be reordered together: +```rust +CommutativeRoot::new("declaration_list", "{", "\n\n", "}") + .restricted_to_groups(&[ + &["using_directive"], + &["namespace_declaration"], + &["global_attribute"], + ]) +``` + ## Add signatures One piece of knowledge we have not encoded yet is the fact that `using` statements should be unique: there is no point in importing the same thing twice. This is specified using so-called signatures, which associate keys to the children of commutative parents. Those keys are then required to be unique among the children of a particular commutative parent. This mechanism can be used to define such keys for a lot of other elements. For instance, class fields are keyed by their name only, given that field names should be unique in a given class, regardless of their type. Keys can also be generated for methods, which not only includes their name but also the types of the arguments the function takes, as [C# supports method overloading](https://learn.microsoft.com/en-us/dotnet/standard/design-guidelines/member-overloading). -- 2.47.3