From 2553cfb90ef322a63f38643ed278d9ebf2288db0 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Fri, 28 Mar 2025 21:05:18 +0100 Subject: [PATCH 01/16] fix: explicitly declare that `LangProfile` is `'static` Lifetime elision otherwise ties the lifetime to the input `filename` lifetime which is not true. --- src/lang_profile.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 9bb0121..2f7da97 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -32,7 +32,7 @@ pub struct LangProfile { impl LangProfile { /// Detects the language of a file based on its filename - pub fn detect_from_filename

(filename: &P) -> Option<&Self> + pub fn detect_from_filename

(filename: &P) -> Option<&'static Self> where P: AsRef + ?Sized, { @@ -40,7 +40,7 @@ impl LangProfile { Self::_detect_from_filename(filename) } - fn _detect_from_filename(filename: &Path) -> Option<&Self> { + fn _detect_from_filename(filename: &Path) -> Option<&'static Self> { // TODO make something more advanced like in difftastic // https://github.com/Wilfred/difftastic/blob/master/src/parse/tree_sitter_parser.rs let extension = filename.extension()?; -- 2.47.3 From 609fd13d71c48a638801eb40eb95ce16ed3ce50c Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Fri, 28 Mar 2025 21:06:31 +0100 Subject: [PATCH 02/16] feat(lang_profile): support loading a language profile by name If autodetection is not accurate, allow an override. --- src/lang_profile.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 2f7da97..d492f0d 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -31,6 +31,13 @@ pub struct LangProfile { } impl LangProfile { + /// Load a profile by language name. + pub fn find_by_name(name: &str) -> Option<&'static Self> { + SUPPORTED_LANGUAGES + .iter() + .find(|lang_profile| lang_profile.name == name) + } + /// Detects the language of a file based on its filename pub fn detect_from_filename

(filename: &P) -> Option<&'static Self> where -- 2.47.3 From 8099c906350bf5bd70a58c3de759415a328edd6a Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Fri, 28 Mar 2025 21:07:25 +0100 Subject: [PATCH 03/16] feat(mergiraf merge,solve): support language overrides If the extension is not recognized or incorrect, allow the user to override the language in use. Fixes: #34 --- src/bin/mergiraf.rs | 7 +++++++ src/merge.rs | 26 +++++++++++++++++++------- src/solve.rs | 18 ++++++++++++------ tests/failing.rs | 2 ++ tests/solve_command.rs | 1 + tests/timeout_support.rs | 1 + tests/working.rs | 1 + 7 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/bin/mergiraf.rs b/src/bin/mergiraf.rs index 2225800..b40ed88 100644 --- a/src/bin/mergiraf.rs +++ b/src/bin/mergiraf.rs @@ -51,6 +51,9 @@ struct MergeOrSolveArgs { #[arg(short = 'l', long)] // the choice of 'l' is inherited from Git's merge driver interface conflict_marker_size: Option, + /// Override automatic language detection. + #[arg(short = 'L', long)] + language: Option, } #[derive(Subcommand, Debug)] @@ -175,6 +178,7 @@ fn real_main(args: CliArgs) -> Result { debug_dir, compact, conflict_marker_size, + language, }, timeout, } => { @@ -256,6 +260,7 @@ fn real_main(args: CliArgs) -> Result { attempts_cache.as_ref(), debug_dir, Duration::from_millis(timeout.unwrap_or(if fast { 5000 } else { 10000 })), + language.as_deref(), ); if let Some(fname_out) = output { write_string_to_file(&fname_out, &merge_result.contents)?; @@ -286,6 +291,7 @@ fn real_main(args: CliArgs) -> Result { debug_dir, compact, conflict_marker_size, + language, }, keep, mut stdout, @@ -335,6 +341,7 @@ fn real_main(args: CliArgs) -> Result { settings, debug_dir.as_deref(), &working_dir, + language.as_deref(), ); match postprocessed { Ok(merged) => { diff --git a/src/merge.rs b/src/merge.rs index 60155c5..8565a9b 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -33,14 +33,26 @@ pub fn line_merge_and_structured_resolution( attempts_cache: Option<&AttemptsCache>, debug_dir: Option<&'static Path>, timeout: Duration, + language: Option<&str>, ) -> MergeResult { - let Some(lang_profile) = LangProfile::detect_from_filename(fname_base) else { - // can't do anything fancier anyway - debug!( - "Could not find a supported language for {}. Falling back to a line-based merge.", - fname_base.display() - ); - return line_based_merge(contents_base, contents_left, contents_right, &settings); + let lang_profile = if let Some(lang_name) = language { + let Some(lang_profile) = LangProfile::find_by_name(lang_name) else { + warn!( + "Specified language '{lang_name}' could not be found. Falling back to a line-based merge." + ); + return line_based_merge(contents_base, contents_left, contents_right, &settings); + }; + lang_profile + } else { + let Some(lang_profile) = LangProfile::detect_from_filename(fname_base) else { + // can't do anything fancier anyway + debug!( + "Could not find a supported language for {}. Falling back to a line-based merge.", + fname_base.display() + ); + return line_based_merge(contents_base, contents_left, contents_right, &settings); + }; + lang_profile }; let merges = cascading_merge( diff --git a/src/solve.rs b/src/solve.rs index f2e1657..fefabe0 100644 --- a/src/solve.rs +++ b/src/solve.rs @@ -19,15 +19,21 @@ pub fn resolve_merge_cascading<'a>( mut settings: DisplaySettings<'a>, debug_dir: Option<&Path>, working_dir: &Path, + language: Option<&str>, ) -> Result { let mut solves = Vec::with_capacity(3); - let lang_profile = LangProfile::detect_from_filename(fname_base).ok_or_else(|| { - format!( - "Could not find a supported language for {}", - fname_base.display() - ) - })?; + let lang_profile = if let Some(lang_name) = language { + LangProfile::find_by_name(lang_name) + .ok_or_else(|| format!("Specified language '{lang_name}' could not be found."))? + } else { + LangProfile::detect_from_filename(fname_base).ok_or_else(|| { + format!( + "Could not find a supported language for {}", + fname_base.display() + ) + })? + }; match ParsedMerge::parse(merge_contents, &settings) { Err(err) => { diff --git a/tests/failing.rs b/tests/failing.rs index 8a728e1..292026e 100644 --- a/tests/failing.rs +++ b/tests/failing.rs @@ -71,6 +71,7 @@ fn integration_failing(#[files("examples/*/failing/*")] test_dir: PathBuf) { None, None, Duration::from_millis(0), + None, ); let actual = merge_result.contents.trim(); @@ -136,6 +137,7 @@ please examine the new output and update ExpectedCurrently.{ext} if it looks oka None, None, Duration::from_millis(0), + None, ); let actual_compact = merge_result.contents.trim(); diff --git a/tests/solve_command.rs b/tests/solve_command.rs index 0c2353c..0f97869 100644 --- a/tests/solve_command.rs +++ b/tests/solve_command.rs @@ -95,6 +95,7 @@ fn solve_command(#[case] conflict_style: &str) { DisplaySettings::default(), None, repo_dir, + None, ) .expect("solving the conflicts returned an error"); diff --git a/tests/timeout_support.rs b/tests/timeout_support.rs index 84b5689..f0c5ff3 100644 --- a/tests/timeout_support.rs +++ b/tests/timeout_support.rs @@ -38,6 +38,7 @@ fn timeout_support() { None, None, Duration::from_millis(1), // very small timeout: structured merging should never be that fast + None, ); let expected = contents_expected.trim(); diff --git a/tests/working.rs b/tests/working.rs index 33a5a46..2bae11f 100644 --- a/tests/working.rs +++ b/tests/working.rs @@ -34,6 +34,7 @@ fn compare_against_merge( None, None, Duration::from_millis(0), + None, ); let expected = contents_expected.trim(); -- 2.47.3 From 8fc5213758a86ae34cc5ee5ceea52037f8366bba Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Fri, 28 Mar 2025 21:13:21 +0100 Subject: [PATCH 04/16] feat(lang_profile): support alternate names for languages Some of the language names are a bit clunky, so offer alternative names for manual selection to use. --- src/lang_profile.rs | 12 +++++++++--- src/supported_langs.rs | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index d492f0d..b2492d1 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -18,6 +18,8 @@ use crate::{ pub struct LangProfile { /// a name that identifies the language pub name: &'static str, + /// alternate names for the language + pub alternate_names: &'static [&'static str], /// the file extensions of files in this language pub extensions: Vec<&'static str>, /// `tree_sitter` parser @@ -33,9 +35,13 @@ pub struct LangProfile { impl LangProfile { /// Load a profile by language name. pub fn find_by_name(name: &str) -> Option<&'static Self> { - SUPPORTED_LANGUAGES - .iter() - .find(|lang_profile| lang_profile.name == name) + SUPPORTED_LANGUAGES.iter().find(|lang_profile| { + lang_profile.name == name + || lang_profile + .alternate_names + .iter() + .any(|aname| *aname == name) + }) } /// Detects the language of a file based on its filename diff --git a/src/supported_langs.rs b/src/supported_langs.rs index a857fad..d3cc69f 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -50,6 +50,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { vec![ LangProfile { name: "Java", + alternate_names: &[], extensions: vec!["java"], language: tree_sitter_java::LANGUAGE.into(), atomic_nodes: vec!["import_declaration"], @@ -162,6 +163,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Java properties", + alternate_names: &[], extensions: vec!["properties"], language: tree_sitter_properties::LANGUAGE.into(), atomic_nodes: vec![], @@ -170,6 +172,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Kotlin", + alternate_names: &[], extensions: vec!["kt"], language: tree_sitter_kotlin_ng::LANGUAGE.into(), atomic_nodes: vec![], @@ -216,6 +219,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Rust", + alternate_names: &[], extensions: vec!["rs"], language: tree_sitter_rust::LANGUAGE.into(), atomic_nodes: vec![], @@ -317,6 +321,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Go", + alternate_names: &[], extensions: vec!["go"], language: tree_sitter_go::LANGUAGE.into(), atomic_nodes: vec!["interpreted_string_literal"], // for https://github.com/tree-sitter/tree-sitter-go/issues/150 @@ -349,6 +354,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Javascript", + alternate_names: &[], extensions: vec!["js", "jsx", "mjs"], language: tree_sitter_javascript::LANGUAGE.into(), atomic_nodes: vec![], @@ -366,6 +372,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "JSON", + alternate_names: &[], extensions: vec!["json"], language: tree_sitter_json::LANGUAGE.into(), atomic_nodes: vec![], @@ -377,6 +384,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "YAML", + alternate_names: &[], extensions: vec!["yml", "yaml"], language: tree_sitter_yaml::LANGUAGE.into(), atomic_nodes: vec![], @@ -385,6 +393,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "TOML", + alternate_names: &[], extensions: vec!["toml"], language: tree_sitter_toml_ng::LANGUAGE.into(), atomic_nodes: vec!["string"], @@ -400,6 +409,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "HTML", + alternate_names: &[], extensions: vec!["html", "htm"], language: tree_sitter_html::LANGUAGE.into(), atomic_nodes: vec![], @@ -414,6 +424,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "XML", + alternate_names: &[], extensions: vec!["xhtml", "xml"], language: tree_sitter_xml::LANGUAGE_XML.into(), atomic_nodes: vec!["AttValue"], @@ -425,6 +436,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "C/C++", + alternate_names: &["C", "C++"], extensions: vec!["c", "h", "cc", "cpp", "hpp", "cxx", "mpp", "cppm", "ixx"], language: tree_sitter_cpp::LANGUAGE.into(), atomic_nodes: vec![], @@ -452,6 +464,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "C#", + alternate_names: &["CSharp"], extensions: vec!["cs"], language: tree_sitter_c_sharp::LANGUAGE.into(), atomic_nodes: vec![], @@ -499,6 +512,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Dart", + alternate_names: &[], extensions: vec!["dart"], language: tree_sitter_dart::language(), atomic_nodes: vec!["import_or_export"], @@ -515,6 +529,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Devicetree Source", + alternate_names: &[], extensions: vec!["dts"], language: tree_sitter_devicetree::LANGUAGE.into(), atomic_nodes: vec!["string_literal"], @@ -526,6 +541,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Scala", + alternate_names: &[], extensions: vec!["scala", "sbt"], language: tree_sitter_scala::LANGUAGE.into(), atomic_nodes: vec![], @@ -534,6 +550,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Typescript", + alternate_names: &[], extensions: vec!["ts"], language: tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), atomic_nodes: vec![], @@ -542,6 +559,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Typescript (TSX)", + alternate_names: &["TSX"], extensions: vec!["tsx"], language: tree_sitter_typescript::LANGUAGE_TSX.into(), atomic_nodes: vec![], @@ -550,6 +568,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Python", + alternate_names: &[], extensions: vec!["py"], language: tree_sitter_python::LANGUAGE.into(), atomic_nodes: vec!["string", "dotted_name"], @@ -576,6 +595,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "PHP", + alternate_names: &[], extensions: vec!["php", "phtml"], language: tree_sitter_php::LANGUAGE_PHP.into(), // optional settings, explained below @@ -611,6 +631,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Solidity", + alternate_names: &[], extensions: vec!["sol"], language: tree_sitter_solidity::LANGUAGE.into(), atomic_nodes: vec![], @@ -622,6 +643,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Lua", + alternate_names: &[], extensions: vec!["lua"], language: tree_sitter_lua::LANGUAGE.into(), atomic_nodes: vec![], @@ -630,6 +652,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Ruby", + alternate_names: &[], extensions: vec!["rb"], language: tree_sitter_ruby::LANGUAGE.into(), atomic_nodes: vec![], @@ -638,6 +661,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Nix", + alternate_names: &[], extensions: vec!["nix"], language: tree_sitter_nix::LANGUAGE.into(), atomic_nodes: vec![], @@ -652,6 +676,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "SystemVerilog", + alternate_names: &[], extensions: vec!["sv", "svh"], language: tree_sitter_verilog::LANGUAGE.into(), atomic_nodes: vec![], -- 2.47.3 From b049246cf0996b72d28dfa3e42750c52c5554dc1 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Fri, 28 Mar 2025 21:15:05 +0100 Subject: [PATCH 05/16] feat(lang_profile): support case-insensitive language matching --- src/lang_profile.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index b2492d1..db8dfd0 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -36,11 +36,11 @@ impl LangProfile { /// Load a profile by language name. pub fn find_by_name(name: &str) -> Option<&'static Self> { SUPPORTED_LANGUAGES.iter().find(|lang_profile| { - lang_profile.name == name + lang_profile.name.eq_ignore_ascii_case(name) || lang_profile .alternate_names .iter() - .any(|aname| *aname == name) + .any(|aname| aname.eq_ignore_ascii_case(name)) }) } -- 2.47.3 From 545367355966fe0c583d87c6b3c81c6d10cc364e Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 16 Apr 2025 13:20:32 +0200 Subject: [PATCH 06/16] Add tests --- src/bin/mergiraf.rs | 100 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/src/bin/mergiraf.rs b/src/bin/mergiraf.rs index b40ed88..67aa2df 100644 --- a/src/bin/mergiraf.rs +++ b/src/bin/mergiraf.rs @@ -546,4 +546,104 @@ mod test { assert!(test_file_orig_file_path.exists()); } + + #[test] + fn manual_language_selection_for_solve() { + let repo_dir = tempfile::tempdir().expect("failed to create the temp dir"); + let repo_path = repo_dir.path(); + + let test_file_name = "test.txt"; + + let test_file_abs_path = repo_path.join(test_file_name); + fs::write(&test_file_abs_path, "<<<<<<< LEFT\n[1, 2, 3, 4]\n||||||| BASE\n[1, 2, 3]\n=======\n[0, 1, 2, 3]\n>>>>>>> RIGHT\n") + .expect("failed to write test file to git repository"); + + // first try without specifying a language + let return_code = real_main(CliArgs::parse_from([ + "mergiraf", + "solve", + test_file_abs_path.to_str().unwrap(), + ])) + .expect("failed to execute `mergiraf solve`"); + assert_eq!( + return_code, 1, + "running `mergiraf solve` should fail because the language can't be detected" + ); + + // then try with a language specified on the CLI + let return_code = real_main(CliArgs::parse_from([ + "mergiraf", + "solve", + "--language=json", + test_file_abs_path.to_str().unwrap(), + ])) + .expect("failed to execute `mergiraf solve`"); + assert_eq!( + return_code, 0, + "`mergiraf solve` should execute successfully with a specified language" + ); + + let merge_result = + fs::read_to_string(test_file_abs_path).expect("couldn't read the merge result"); + assert_eq!(merge_result, "[0, 1, 2, 3, 4]\n"); + } + + #[test] + fn manual_language_selection_for_merge() { + let repo_dir = tempfile::tempdir().expect("failed to create the temp dir"); + let repo_path = repo_dir.path(); + + let base_file_name = "base.txt"; + let left_file_name = "left.txt"; + let right_file_name = "right.txt"; + let output_file_name = "output.txt"; + + let base_file_abs_path = repo_path.join(base_file_name); + fs::write(&base_file_abs_path, "[1, 2, 3]\n") + .expect("failed to write test base file to git repository"); + let left_file_abs_path = repo_path.join(left_file_name); + fs::write(&left_file_abs_path, "[1, 2, 3, 4]\n") + .expect("failed to write test left file to git repository"); + let right_file_abs_path = repo_path.join(right_file_name); + fs::write(&right_file_abs_path, "[0, 1, 2, 3]\n") + .expect("failed to write test right file to git repository"); + let output_file_abs_path = repo_path.join(output_file_name); + + // first try without specifying a language + let return_code = real_main(CliArgs::parse_from([ + "mergiraf", + "merge", + base_file_abs_path.to_str().unwrap(), + left_file_abs_path.to_str().unwrap(), + right_file_abs_path.to_str().unwrap(), + "--output", + output_file_abs_path.to_str().unwrap(), + ])) + .expect("failed to execute `mergiraf merge`"); + assert_eq!( + return_code, 1, + "running `mergiraf merge` should fail because the language can't be detected" + ); + + // then try with a language specified on the CLI + let return_code = real_main(CliArgs::parse_from([ + "mergiraf", + "merge", + "--language=json", + base_file_abs_path.to_str().unwrap(), + left_file_abs_path.to_str().unwrap(), + right_file_abs_path.to_str().unwrap(), + "--output", + output_file_abs_path.to_str().unwrap(), + ])) + .expect("failed to execute `mergiraf merge`"); + assert_eq!( + return_code, 0, + "`mergiraf merge` should execute successfully with a specified language" + ); + + let merge_result = + fs::read_to_string(output_file_abs_path).expect("couldn't read the merge result"); + assert_eq!(merge_result, "[0, 1, 2, 3, 4]\n"); + } } -- 2.47.3 From 0e9cde79394f89d95a1acf22360236ea64ad7826 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 16 Apr 2025 13:29:08 +0200 Subject: [PATCH 07/16] Add documentation --- doc/src/usage.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/usage.md b/doc/src/usage.md index 23414f4..f17b23e 100644 --- a/doc/src/usage.md +++ b/doc/src/usage.md @@ -95,6 +95,12 @@ $ mergiraf=0 git rebase origin/master This will fall back on Git's regular merge heuristics, without requiring changes to your configuration. +#### Manually specifying the file's language + +You can use the `--language` option (short: `-L`) to specify the language of the files to merge. +It accepts both file extensions (`--language js`) and language names (`--language javascript`), as specified in the list of [supported languages](./languages.md). +This will override the language detection done by Mergiraf, which is currently based on file extensions only. + #### Reporting a bad merge If the output of a merge looks odd, you are encouraged to report it as a bug. The `mergiraf report` command generates an archive containing all necessary information to reproduce the faulty merge. -- 2.47.3 From 719c61c6ad51c93d0bd00f5e79f04623c037e298 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 16 Apr 2025 13:36:12 +0200 Subject: [PATCH 08/16] Also update the docs for adding a language --- doc/src/adding-a-language.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/adding-a-language.md b/doc/src/adding-a-language.md index 799efa3..8dfc860 100644 --- a/doc/src/adding-a-language.md +++ b/doc/src/adding-a-language.md @@ -18,7 +18,8 @@ The version of the parser must be selected so that it is compatible with the ver Then, go to `src/supported_langs.rs` and add a profile for the language. You can start with a minimal one, such as: ```rust LangProfile { - name: "C#", // only used for logging purposes so far + name: "C#", // used for the --language CLI option, and generating the list of supported languages + alternate_names: vec![], // other possible values for --language extensions: vec!["cs"], // all file extensions for this language (note the lack of `.`!) language: tree_sitter_c_sharp::LANGUAGE.into(), // the tree-sitter parser // optional settings, explained below -- 2.47.3 From 240ee0fce14d3f51f0c028ad0dfab6d5a4060588 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 22 Apr 2025 14:19:10 +0200 Subject: [PATCH 09/16] refactor: introduce LangProfile::find_by_filename_or_name --- src/lang_profile.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/merge.rs | 22 +++++----------------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index db8dfd0..992183c 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -53,6 +53,27 @@ impl LangProfile { Self::_detect_from_filename(filename) } + /// Loads a language either by name or by detecting it from a filename + pub fn find_by_filename_or_name

( + filename: &P, + language_name: Option<&str>, + ) -> Result<&'static Self, String> + where + P: AsRef + ?Sized, + { + if let Some(lang_name) = language_name { + Self::find_by_name(lang_name) + .ok_or_else(|| format!("Specified language '{lang_name}' could not be found.")) + } else { + Self::detect_from_filename(filename).ok_or_else(|| { + format!( + "Could not find a supported language for {}.", + filename.as_ref().display() + ) + }) + } + } + fn _detect_from_filename(filename: &Path) -> Option<&'static Self> { // TODO make something more advanced like in difftastic // https://github.com/Wilfred/difftastic/blob/master/src/parse/tree_sitter_parser.rs @@ -247,4 +268,27 @@ mod tests { assert!(lang_profile.has_signature_conflicts(with_conflicts)); assert!(!lang_profile.has_signature_conflicts(without_conflicts)); } + + #[test] + fn find_by_filename_or_name() { + assert_eq!( + LangProfile::find_by_filename_or_name(Path::new("file.json"), None) + .unwrap() + .name, + "JSON" + ); + assert_eq!( + LangProfile::find_by_filename_or_name(Path::new("file.java"), Some("JSON")) + .unwrap() + .name, + "JSON" + ); + LangProfile::find_by_filename_or_name( + Path::new("file.json"), + Some("non-existent language"), + ) + .expect_err("If a language name is provided, the file name should be ignored"); + LangProfile::find_by_filename_or_name(Path::new("file.unknown_extension"), None) + .expect_err("Looking up language by unknown extension should fail"); + } } diff --git a/src/merge.rs b/src/merge.rs index 8565a9b..a4dd295 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -35,24 +35,12 @@ pub fn line_merge_and_structured_resolution( timeout: Duration, language: Option<&str>, ) -> MergeResult { - let lang_profile = if let Some(lang_name) = language { - let Some(lang_profile) = LangProfile::find_by_name(lang_name) else { - warn!( - "Specified language '{lang_name}' could not be found. Falling back to a line-based merge." - ); + let lang_profile = match LangProfile::find_by_filename_or_name(fname_base, language) { + Ok(lang_profile) => lang_profile, + Err(message) => { + warn!("{message}. Falling back to a line-based merge."); return line_based_merge(contents_base, contents_left, contents_right, &settings); - }; - lang_profile - } else { - let Some(lang_profile) = LangProfile::detect_from_filename(fname_base) else { - // can't do anything fancier anyway - debug!( - "Could not find a supported language for {}. Falling back to a line-based merge.", - fname_base.display() - ); - return line_based_merge(contents_base, contents_left, contents_right, &settings); - }; - lang_profile + } }; let merges = cascading_merge( -- 2.47.3 From 7d094b262350d75ae3868d70c09f98087b8f8326 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 22 Apr 2025 14:33:27 +0200 Subject: [PATCH 10/16] =?UTF-8?q?feat:=C2=A0Add=20--language=20support=20t?= =?UTF-8?q?o=20mgf=5Fdev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/mgf_dev.rs | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/bin/mgf_dev.rs b/src/bin/mgf_dev.rs index a9b7600..1b46449 100644 --- a/src/bin/mgf_dev.rs +++ b/src/bin/mgf_dev.rs @@ -21,6 +21,9 @@ use typed_arena::Arena; struct CliArgs { #[command(subcommand)] command: Command, + /// Override automatic language detection. + #[arg(short = 'L', long, global = true)] + language: Option, } #[derive(Subcommand, Debug)] @@ -65,12 +68,7 @@ fn real_main(args: &CliArgs) -> Result { }; let lang_profile = - LangProfile::detect_from_filename(language_determining_path).ok_or_else(|| { - format!( - "Could not detect a supported language for {}", - language_determining_path.display() - ) - })?; + LangProfile::find_by_filename_or_name(language_determining_path, args.language.as_deref())?; let mut parser = TSParser::new(); parser @@ -212,4 +210,25 @@ mod tests { Ok(0) ); } + + #[test] + fn set_language() { + let repo_dir = tempfile::tempdir().expect("failed to create the temp dir"); + let test_file = repo_dir.path().join(Path::new("file.txt")); + fs::copy( + Path::new("examples/java/working/demo/Base.java"), + &test_file, + ) + .expect("Failed to copy the Java file to the temporary directory"); + assert_eq!( + real_main(&CliArgs::parse_from([ + "mgf_dev", + "parse", + "--language", + "java", + test_file.display().to_string().as_str(), + ])), + Ok(0) + ); + } } -- 2.47.3 From c010642734d72ba7c12b7e2f3d36a7eadea466af Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 22 Apr 2025 14:48:18 +0200 Subject: [PATCH 11/16] Also consider extensions as language names --- src/lang_profile.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 992183c..2cc8b0e 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -34,12 +34,14 @@ pub struct LangProfile { impl LangProfile { /// Load a profile by language name. + /// Alternate names or extensions are also considered. pub fn find_by_name(name: &str) -> Option<&'static Self> { SUPPORTED_LANGUAGES.iter().find(|lang_profile| { lang_profile.name.eq_ignore_ascii_case(name) || lang_profile .alternate_names .iter() + .chain(lang_profile.extensions.iter()) .any(|aname| aname.eq_ignore_ascii_case(name)) }) } @@ -269,6 +271,22 @@ mod tests { assert!(!lang_profile.has_signature_conflicts(without_conflicts)); } + #[test] + fn find_by_name() { + assert_eq!(LangProfile::find_by_name("JSON").unwrap().name, "JSON"); + assert_eq!(LangProfile::find_by_name("Json").unwrap().name, "JSON"); + assert_eq!(LangProfile::find_by_name("python").unwrap().name, "Python"); + assert_eq!(LangProfile::find_by_name("py").unwrap().name, "Python"); + assert_eq!( + LangProfile::find_by_name("Java properties").unwrap().name, + "Java properties" + ); + assert!( + LangProfile::find_by_name("unknown language").is_none(), + "Language shouldn't be found" + ); + } + #[test] fn find_by_filename_or_name() { assert_eq!( -- 2.47.3 From 34b9ab1bf963f29ebf2956466a5ff72aaddedc5b Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sun, 27 Apr 2025 09:26:35 +0200 Subject: [PATCH 12/16] Stylistic improvements --- src/bin/mgf_dev.rs | 9 +++------ src/lang_profile.rs | 23 ++++++++++++----------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/bin/mgf_dev.rs b/src/bin/mgf_dev.rs index 1b46449..89ea80c 100644 --- a/src/bin/mgf_dev.rs +++ b/src/bin/mgf_dev.rs @@ -214,12 +214,9 @@ mod tests { #[test] fn set_language() { let repo_dir = tempfile::tempdir().expect("failed to create the temp dir"); - let test_file = repo_dir.path().join(Path::new("file.txt")); - fs::copy( - Path::new("examples/java/working/demo/Base.java"), - &test_file, - ) - .expect("Failed to copy the Java file to the temporary directory"); + let test_file = repo_dir.path().join("file.txt"); + fs::copy("examples/java/working/demo/Base.java", &test_file) + .expect("Failed to copy the Java file to the temporary directory"); assert_eq!( real_main(&CliArgs::parse_from([ "mgf_dev", diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 2cc8b0e..e9605e5 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -38,10 +38,8 @@ impl LangProfile { pub fn find_by_name(name: &str) -> Option<&'static Self> { SUPPORTED_LANGUAGES.iter().find(|lang_profile| { lang_profile.name.eq_ignore_ascii_case(name) - || lang_profile - .alternate_names - .iter() - .chain(lang_profile.extensions.iter()) + || (lang_profile.alternate_names.iter()) + .chain(&lang_profile.extensions) .any(|aname| aname.eq_ignore_ascii_case(name)) }) } @@ -290,22 +288,25 @@ mod tests { #[test] fn find_by_filename_or_name() { assert_eq!( - LangProfile::find_by_filename_or_name(Path::new("file.json"), None) + LangProfile::find_by_filename_or_name("file.json", None) .unwrap() .name, "JSON" ); assert_eq!( - LangProfile::find_by_filename_or_name(Path::new("file.java"), Some("JSON")) + LangProfile::find_by_filename_or_name("file.java", Some("JSON")) .unwrap() .name, "JSON" ); - LangProfile::find_by_filename_or_name( - Path::new("file.json"), - Some("non-existent language"), - ) - .expect_err("If a language name is provided, the file name should be ignored"); + assert!( + LangProfile::find_by_filename_or_name( + Path::new("file.json"), + Some("non-existent language"), + ) + .is_err(), + "If a language name is provided, the file name should be ignored" + ); LangProfile::find_by_filename_or_name(Path::new("file.unknown_extension"), None) .expect_err("Looking up language by unknown extension should fail"); } -- 2.47.3 From 23604f7f9aaf6d3c45897a48c8ca6024c1cb09da Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sun, 27 Apr 2025 09:27:46 +0200 Subject: [PATCH 13/16] Remove duplicate dot --- src/lang_profile.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index e9605e5..a308a41 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -63,11 +63,11 @@ impl LangProfile { { if let Some(lang_name) = language_name { Self::find_by_name(lang_name) - .ok_or_else(|| format!("Specified language '{lang_name}' could not be found.")) + .ok_or_else(|| format!("Specified language '{lang_name}' could not be found")) } else { Self::detect_from_filename(filename).ok_or_else(|| { format!( - "Could not find a supported language for {}.", + "Could not find a supported language for {}", filename.as_ref().display() ) }) -- 2.47.3 From 538e2439279b7b50258c79cde413a83a13e62994 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sun, 27 Apr 2025 09:32:48 +0200 Subject: [PATCH 14/16] Also use find_by_filename_or_name in solve.rs --- src/solve.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/solve.rs b/src/solve.rs index fefabe0..0dc217e 100644 --- a/src/solve.rs +++ b/src/solve.rs @@ -23,17 +23,7 @@ pub fn resolve_merge_cascading<'a>( ) -> Result { let mut solves = Vec::with_capacity(3); - let lang_profile = if let Some(lang_name) = language { - LangProfile::find_by_name(lang_name) - .ok_or_else(|| format!("Specified language '{lang_name}' could not be found."))? - } else { - LangProfile::detect_from_filename(fname_base).ok_or_else(|| { - format!( - "Could not find a supported language for {}", - fname_base.display() - ) - })? - }; + let lang_profile = LangProfile::find_by_filename_or_name(fname_base, language)?; match ParsedMerge::parse(merge_contents, &settings) { Err(err) => { -- 2.47.3 From d7e2290b5ee956ae0bca8dcf711f09e2c169dd33 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 28 Apr 2025 17:40:12 +0200 Subject: [PATCH 15/16] Style fixes --- src/bin/mgf_dev.rs | 2 +- src/lang_profile.rs | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/bin/mgf_dev.rs b/src/bin/mgf_dev.rs index 89ea80c..1d511be 100644 --- a/src/bin/mgf_dev.rs +++ b/src/bin/mgf_dev.rs @@ -223,7 +223,7 @@ mod tests { "parse", "--language", "java", - test_file.display().to_string().as_str(), + test_file.to_str().unwrap(), ])), Ok(0) ); diff --git a/src/lang_profile.rs b/src/lang_profile.rs index a308a41..1357e86 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -300,14 +300,13 @@ mod tests { "JSON" ); assert!( - LangProfile::find_by_filename_or_name( - Path::new("file.json"), - Some("non-existent language"), - ) - .is_err(), + LangProfile::find_by_filename_or_name("file.json", Some("non-existent language"),) + .is_err(), "If a language name is provided, the file name should be ignored" ); - LangProfile::find_by_filename_or_name(Path::new("file.unknown_extension"), None) - .expect_err("Looking up language by unknown extension should fail"); + assert!( + LangProfile::find_by_filename_or_name("file.unknown_extension", None).is_err(), + "Looking up language by unknown extension should fail" + ); } } -- 2.47.3 From 230050c63b826352d8fa0f4ac9bdd4e5a40825b1 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 28 Apr 2025 20:08:40 +0200 Subject: [PATCH 16/16] Remove explicit TSX alternate name --- src/supported_langs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/supported_langs.rs b/src/supported_langs.rs index d3cc69f..15658b7 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -559,7 +559,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Typescript (TSX)", - alternate_names: &["TSX"], + alternate_names: &[], extensions: vec!["tsx"], language: tree_sitter_typescript::LANGUAGE_TSX.into(), atomic_nodes: vec![], -- 2.47.3