From a475276f12ea9df3146db16f23f33d26cdeeb356 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 9 Sep 2025 17:45:39 +0200 Subject: [PATCH 1/4] Add failing example --- examples/typescript/working/attributes/Base.ts | 4 ++++ examples/typescript/working/attributes/Expected.ts | 6 ++++++ examples/typescript/working/attributes/Left.ts | 5 +++++ examples/typescript/working/attributes/Right.ts | 5 +++++ 4 files changed, 20 insertions(+) create mode 100644 examples/typescript/working/attributes/Base.ts create mode 100644 examples/typescript/working/attributes/Expected.ts create mode 100644 examples/typescript/working/attributes/Left.ts create mode 100644 examples/typescript/working/attributes/Right.ts diff --git a/examples/typescript/working/attributes/Base.ts b/examples/typescript/working/attributes/Base.ts new file mode 100644 index 0000000..7b6b083 --- /dev/null +++ b/examples/typescript/working/attributes/Base.ts @@ -0,0 +1,4 @@ +export class Grid { + userAgent: string; + apiKey: string; +} diff --git a/examples/typescript/working/attributes/Expected.ts b/examples/typescript/working/attributes/Expected.ts new file mode 100644 index 0000000..9dc729e --- /dev/null +++ b/examples/typescript/working/attributes/Expected.ts @@ -0,0 +1,6 @@ +export class Grid { + userAgent: string; + apiKey: string; + strategy: string | null; + mission = new Mission(this); +} diff --git a/examples/typescript/working/attributes/Left.ts b/examples/typescript/working/attributes/Left.ts new file mode 100644 index 0000000..bd7719e --- /dev/null +++ b/examples/typescript/working/attributes/Left.ts @@ -0,0 +1,5 @@ +export class Grid { + userAgent: string; + apiKey: string; + strategy: string | null; +} diff --git a/examples/typescript/working/attributes/Right.ts b/examples/typescript/working/attributes/Right.ts new file mode 100644 index 0000000..be74342 --- /dev/null +++ b/examples/typescript/working/attributes/Right.ts @@ -0,0 +1,5 @@ +export class Grid { + userAgent: string; + apiKey: string; + mission = new Mission(this); +} -- 2.47.3 From 5640ce4143c9298183e46faa0050c770c5028469 Mon Sep 17 00:00:00 2001 From: Ada Alakbarova Date: Tue, 9 Sep 2025 21:24:29 +0200 Subject: [PATCH 2/4] misc: use `Result::ok_or` --- src/tree_builder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tree_builder.rs b/src/tree_builder.rs index 3d6c4c0..fa0a6e5 100644 --- a/src/tree_builder.rs +++ b/src/tree_builder.rs @@ -659,9 +659,9 @@ impl<'a, 'b> TreeBuilder<'a, 'b> { } }) .try_collect()?; - let Some(raw_separator) = commutative_parent.child_separator(&child_types) else { - return Err("The children are not allowed to commute".to_string()); - }; + let raw_separator = commutative_parent + .child_separator(&child_types) + .ok_or("The children are not allowed to commute")?; // NOTE: trimmed_sep is still consistent with raw_separator per the assumption that the two // kinds of separators are equal up to leading and trailing whitespace -- 2.47.3 From e52a3bd0a29443783e73902f5b6deb798b252657 Mon Sep 17 00:00:00 2001 From: Ada Alakbarova Date: Tue, 9 Sep 2025 21:05:01 +0200 Subject: [PATCH 3/4] allow child group separators to differ from the default separator by more than whitespace --- doc/src/adding-a-language.md | 2 -- src/lang_profile.rs | 63 +++++++++++++++++++----------------- src/tree_builder.rs | 39 ++++++---------------- 3 files changed, 43 insertions(+), 61 deletions(-) diff --git a/doc/src/adding-a-language.md b/doc/src/adding-a-language.md index e0dbab5..b59ec92 100644 --- a/doc/src/adding-a-language.md +++ b/doc/src/adding-a-language.md @@ -215,8 +215,6 @@ CommutativeParent::new("declaration_list", "{", "\n\n", "}") ]), ``` -Note that the separator for a children group and the separator for the commutative parent can only differ in leading and trailing whitespace. - ## Add signatures One piece of knowledge we have not encoded yet is the fact that `using` statements should be unique: there is no point in importing the same thing twice. This is specified using so-called signatures, which associate keys to the children of commutative parents. Those keys are then required to be unique among the children of a particular commutative parent. This mechanism can be used to define such keys for a lot of other elements. For instance, class fields are keyed by their name only, given that field names should be unique in a given class, regardless of their type. Keys can also be generated for methods, which not only includes their name but also the types of the arguments the function takes, as [C# supports method overloading](https://learn.microsoft.com/en-us/dotnet/standard/design-guidelines/member-overloading). diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 4cd9de0..16ba687 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -3,7 +3,7 @@ use std::{collections::HashSet, ffi::OsStr, fmt::Display, hash::Hash, path::Path use itertools::Itertools; use tree_sitter::Language; -use crate::{signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES}; +use crate::{ast::AstNode, signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES}; /// Language-dependent settings to influence how merging is done. /// All those settings are declarative (except for the tree-sitter parser, which is @@ -326,20 +326,6 @@ impl CommutativeParent { /// Restrict a commutative parent to some children groups, possibly with their own separators pub(crate) fn restricted_to(self, children_groups: Vec) -> Self { - #[cfg(debug_assertions)] - { - for children_group in &children_groups { - if let Some(specific_separator) = children_group.separator { - assert_eq!( - specific_separator.trim(), - self.separator.trim(), - "Children group separator '{specific_separator:?}' inconsistent with parent separator '{:?}' in commutative parent '{:?}'", - self.separator, - self.parent_type - ); - } - } - } Self { children_groups, ..self @@ -354,22 +340,43 @@ impl CommutativeParent { /// Can children with the supplied types commute together? /// If so, return the separator to use when inserting two nodes /// in the same place. - /// TODO: return None if any of the node_types is extra, - /// see https://codeberg.org/mergiraf/mergiraf/issues/467 - pub(crate) fn child_separator(&self, node_types: &HashSet<&str>) -> Option<&'static str> { - if self.children_groups.is_empty() { + pub(crate) fn child_separator<'a>( + &self, + base_nodes: &[&'a AstNode<'a>], + left_nodes: &[&'a AstNode<'a>], + right_nodes: &[&'a AstNode<'a>], + ) -> Option<&'static str> { + let trimmed_left_delim = self.left_delim.unwrap_or_default().trim(); + let trimmed_right_delim = self.right_delim.unwrap_or_default().trim(); + + if (base_nodes.iter()) + .chain(left_nodes) + .chain(right_nodes) + .any(|node| node.is_extra) + { + // Extra nodes can't commute + None + } else if self.children_groups.is_empty() { // If there are no children groups to restrict commutativity to, // any children can commute and the default separator is used Some(self.separator) } else { - // Otherwise, children can only commute if their types all belong - // to the same group, in which case the separator is either that of - // that specific group, or the default one for the commutative parent - // as a fall-back. - self.children_groups - .iter() - .find(|group| group.node_types.is_superset(node_types)) - .map(|group| group.separator.unwrap_or(self.separator)) + // Otherwise, children belong to a given group if both the grammar kinds of the content nodes + // and the contents of the separator nodes are accepted by the group. + self.children_groups.iter().find_map(|group| { + let group_separator = group.separator.unwrap_or(self.separator); + (base_nodes.iter()) + .chain(left_nodes) + .chain(right_nodes) + .all(|node| { + let trimmed = node.source.trim(); + group.node_types.contains(node.kind) + || trimmed == group_separator.trim() + || trimmed == trimmed_right_delim + || trimmed == trimmed_left_delim + }) + .then_some(group_separator) + }) } } @@ -406,8 +413,6 @@ pub struct ChildrenGroup { pub node_types: HashSet<&'static str>, /// An optional separator specific to this children group, /// better suited than the one from the commutative parent. - /// It must only differ from the separator of the parent up to - /// whitespace (their trimmed versions should be equal). pub separator: Option<&'static str>, } diff --git a/src/tree_builder.rs b/src/tree_builder.rs index fa0a6e5..067faa9 100644 --- a/src/tree_builder.rs +++ b/src/tree_builder.rs @@ -605,12 +605,17 @@ impl<'a, 'b> TreeBuilder<'a, 'b> { ) -> Result>, String> { let pad = visiting_state.indentation(); trace!("{pad}commutatively_merge_lists"); - let trimmed_sep = commutative_parent.trimmed_separator(); - let trimmed_left_delim = commutative_parent.left_delim.unwrap_or_default().trim(); - let trimmed_right_delim = commutative_parent.right_delim.unwrap_or_default().trim(); // TODO improve handling of comments? comments added by the right side should ideally be placed sensibly - // first, map each list via class mapping to make each element comparable + // check that all the nodes involved are allowed to commute in this context + let raw_separator = commutative_parent + .child_separator(base, left, right) + .ok_or("The children are not allowed to commute")?; + let trimmed_sep = raw_separator.trim(); + let trimmed_left_delim = commutative_parent.left_delim.unwrap_or_default().trim(); + let trimmed_right_delim = commutative_parent.right_delim.unwrap_or_default().trim(); + + // map each list via class mapping to make each element comparable let base_leaders: HashSet<_> = self .keep_content_only( base, @@ -639,32 +644,6 @@ impl<'a, 'b> TreeBuilder<'a, 'b> { ) .collect(); - // check that all the nodes involved are allowed to commute in this context - let child_types: HashSet<&str> = (base_leaders.iter()) - .chain(left_leaders.iter()) - .chain(right_leaders.iter()) - .map(|leader| { - if self - .class_mapping - .representatives(leader) - .iter() - .any(|repr| repr.node.is_extra) - { - Err( - "One of the children is an extra node, cannot merge commutatively" - .to_string(), - ) - } else { - Ok(leader.kind()) - } - }) - .try_collect()?; - let raw_separator = commutative_parent - .child_separator(&child_types) - .ok_or("The children are not allowed to commute")?; - // NOTE: trimmed_sep is still consistent with raw_separator per the assumption that the two - // kinds of separators are equal up to leading and trailing whitespace - let left_added: HashSet<_> = left_leaders .iter() .filter(|x| !base_leaders.contains(x)) -- 2.47.3 From 3a2c6f985c171103fe9ea23e2a13d6ff9ef38f30 Mon Sep 17 00:00:00 2001 From: Ada Alakbarova Date: Tue, 9 Sep 2025 18:45:10 +0200 Subject: [PATCH 4/4] add `ChildrenGroup` for `public_field_definition` ..which forces us to also add all the others --- src/supported_langs.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/supported_langs.rs b/src/supported_langs.rs index a723d3e..12a45ae 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -16,7 +16,15 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { .restricted_to_groups(&[&["import_statement"]]), CommutativeParent::new("named_imports", "{", ", ", "}"), CommutativeParent::new("object", "{", ", ", "}"), - CommutativeParent::new("class_body", " {\n", "\n\n", "\n}\n"), + CommutativeParent::new("class_body", " {\n", "\n\n", "\n}\n").restricted_to(vec![ + ChildrenGroup::with_separator(&["public_field_definition", "index_signature"], ";\n"), + ChildrenGroup::new(&["class_static_block"]), + ChildrenGroup::new(&[ + "abstract_method_signature", + "method_definition", + "method_signature", + ]), + ]), CommutativeParent::new("interface_body", " {\n", ";\n", "\n}\n"), CommutativeParent::new("object_type", " {\n", ";\n", "\n}\n"), CommutativeParent::new("enum_body", " {\n", ",\n", "\n}\n"), -- 2.47.3