From 67b950d38f60bc3a2064543f95a3e7454d73531c Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Tue, 12 Aug 2025 23:56:22 -0400 Subject: [PATCH 01/12] feat(Git): support reading language from git attributes When used as a merge driver, `mergiraf` only has the file extension as guidance for language specification. This may not be suitable in all cases (e.g., custom languages that are supported-language-adjacent or `.in` template files that are mostly another language). Allow specification of the language using the `mergiraf.language` attribute. --- doc/src/usage.md | 5 +++ mgf_dev/src/main.rs | 6 ++- src/git.rs | 32 +++++++++++++++ src/lang_profile.rs | 84 +++++++++++++++++++++++++++++++++++++++- src/main.rs | 2 + src/merge.rs | 4 +- src/solve.rs | 3 +- tests/failing.rs | 2 + tests/timeout_support.rs | 1 + tests/working.rs | 1 + 10 files changed, 135 insertions(+), 5 deletions(-) diff --git a/doc/src/usage.md b/doc/src/usage.md index 31b3580..4f0046b 100644 --- a/doc/src/usage.md +++ b/doc/src/usage.md @@ -60,6 +60,11 @@ $ mergiraf languages --gitattributes If you want to enable Mergiraf only in a certain repository, add the lines above in the `.gitattributes` file at the root of that repository instead, or in `.git/info/attributes` if you don't want it to be tracked in the repository. +If `mergiraf` does not recognize your file's language by extension, you may set the `mergiraf.language` attribute on the file to specify it manually: +``` +*.myjs mergiraf.language=javascript +``` + #### Trying it out An [example repository](https://codeberg.org/mergiraf/example-repo) is available for you to try out Mergiraf on simple examples: diff --git a/mgf_dev/src/main.rs b/mgf_dev/src/main.rs index 7af595d..02cf522 100644 --- a/mgf_dev/src/main.rs +++ b/mgf_dev/src/main.rs @@ -92,7 +92,11 @@ fn real_main(args: &CliArgs) -> Result { let ref_arena = Arena::new(); let lang_profile = |language_determining_path| { - LangProfile::find_by_filename_or_name(language_determining_path, args.language.as_deref()) + LangProfile::find_by_filename_or_name( + language_determining_path, + args.language.as_deref(), + None, + ) }; let contents = |path: &Path| -> Result, String> { diff --git a/src/git.rs b/src/git.rs index 814734b..768c5ee 100644 --- a/src/git.rs +++ b/src/git.rs @@ -94,3 +94,35 @@ pub(crate) fn read_content_from_commits( read_content_from_commit(repo_dir, oids.2, file_name)?, )) } + +pub(crate) fn read_attribute_for_file( + repo_dir: &Path, + file_name: &Path, + attr: &str, +) -> Option { + Command::new("git") + .args([ + "check-attr", + "-z", + attr, + "--", + &format!("{}", file_name.display()), + ]) + .current_dir(repo_dir) + .output() + .ok() + .filter(|output| output.status.success()) + .and_then(|output| { + output + .stdout + .split(|b| *b == b'\0') + .nth(2) + .map(|value| value.to_vec()) + }) + .and_then(|c| String::from_utf8(c).ok()) +} + +pub(crate) fn read_lang_attribute(repo_dir: &Path, file_name: &Path) -> Option { + read_attribute_for_file(repo_dir, file_name, "mergiraf.language") + .filter(|value| value != "unspecified" && value != "set" && value != "unset") +} diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 7c78afd..defbb92 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -3,7 +3,7 @@ use std::{collections::HashSet, ffi::OsStr, fmt::Display, hash::Hash, path::Path use itertools::Itertools; use tree_sitter::Language; -use crate::{ast::AstNode, signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES}; +use crate::{ast::AstNode, git, signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES}; /// Language-dependent settings to influence how merging is done. /// All those settings are declarative (except for the tree-sitter parser, which is @@ -98,10 +98,19 @@ impl LangProfile { inner(filename.as_ref()) } + /// Detects the language of a file based on VCS attributes + pub fn detect_language_from_vcs_attr

(repo_dir: &Path, filename: P) -> Option + where + P: AsRef, + { + git::read_lang_attribute(repo_dir, filename.as_ref()) + } + /// Loads a language either by name or by detecting it from a filename pub fn find_by_filename_or_name

( filename: P, language_name: Option<&str>, + repo_dir: Option<&Path>, ) -> Result<&'static Self, String> where P: AsRef, @@ -110,6 +119,12 @@ impl LangProfile { if let Some(lang_name) = language_name { Self::find_by_name(lang_name) .ok_or_else(|| format!("Specified language '{lang_name}' could not be found")) + } else if let Some(lang_name) = + repo_dir.and_then(|repo_dir| Self::detect_language_from_vcs_attr(repo_dir, filename)) + { + Self::find_by_name(&lang_name).ok_or_else(|| { + format!("Attribute-specified language '{lang_name}' could not be found") + }) } else { Self::detect_from_filename(filename).ok_or_else(|| { format!( @@ -481,6 +496,8 @@ impl ChildrenGroup { #[cfg(test)] mod tests { + use std::{env, fs::File, io::Write, process::Command}; + use super::*; use crate::{signature::PathStep, test_utils::ctx}; @@ -515,7 +532,7 @@ mod tests { #[test] fn find_by_filename_or_name() { fn find(filename: &str, name: Option<&str>) -> Result<&'static LangProfile, String> { - LangProfile::find_by_filename_or_name(filename, name) + LangProfile::find_by_filename_or_name(filename, name, None) } assert_eq!(find("file.json", None).unwrap().name, "JSON"); assert_eq!(find("file.java", Some("JSON")).unwrap().name, "JSON"); @@ -609,4 +626,67 @@ mod tests { Err("invalid flattened node type: \"foo_bar\"".to_string()) ); } + + #[test] + fn find_by_filename_or_name_vcs() { + let mut working_dir = env::current_exe().unwrap(); + working_dir.pop(); + let tempdir = tempfile::tempdir_in(working_dir).unwrap(); + + Command::new("git") + .arg("init") + .current_dir(&tempdir) + .output() + .expect("failed to init git repository"); + { + let attrpath = tempdir.path().join(".gitattributes"); + let mut attrfile = File::create(attrpath).unwrap(); + write!( + &mut attrfile, + concat!( + "*.bogus mergiraf.language=bogus\n", + "*.js mergiraf.language=javascript\n", + "*.myjs mergiraf.language=javascript\n", + ), + ) + .unwrap(); + } + Command::new("git") + .args([ + "-c", + "user.email=mergiraf@example.com", + "-c", + "user.name=Mergiraf Testing", + "commit", + "-a", + "-m", + "add gitattributes", + ]) + .current_dir(&tempdir) + .output() + .expect("failed to commit attribute file"); + + fn find_impl( + filename: &str, + name: Option<&str>, + repo_dir: &Path, + ) -> Result<&'static LangProfile, String> { + LangProfile::find_by_filename_or_name(filename, name, Some(repo_dir)) + } + let find = |filename, name| find_impl(filename, name, tempdir.path()); + assert_eq!( + find("file.bogus", None).unwrap_err(), + "Attribute-specified language 'bogus' could not be found", + ); + assert_eq!( + find("file.noattr", None).unwrap_err(), + "Could not find a supported language for file.noattr", + ); + assert_eq!(find("file.js", None).unwrap().name, "Javascript"); + assert_eq!(find("file.myjs", None).unwrap().name, "Javascript"); + assert_eq!(find("file.bogus", Some("python")).unwrap().name, "Python"); + assert_eq!(find("file.noattr", Some("python")).unwrap().name, "Python"); + assert_eq!(find("file.js", Some("python")).unwrap().name, "Python"); + assert_eq!(find("file.myjs", Some("python")).unwrap().name, "Python"); + } } diff --git a/src/main.rs b/src/main.rs index 814f443..76061c2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -241,6 +241,7 @@ fn real_main(args: CliArgs) -> Result { let fname_base = path_name.unwrap_or(fname_base); + let working_dir = env::current_dir().expect("Invalid current directory"); let merge_result = line_merge_and_structured_resolution( contents_base, contents_left, @@ -252,6 +253,7 @@ fn real_main(args: CliArgs) -> Result { debug_dir, Duration::from_millis(timeout.unwrap_or(if fast { 5000 } else { 10000 })), language.as_deref(), + Some(&working_dir), ); if let Some(fname_out) = output { write_string_to_file(&fname_out, &merge_result.contents)?; diff --git a/src/merge.rs b/src/merge.rs index 9ade9d0..f7a8c46 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -36,8 +36,10 @@ pub fn line_merge_and_structured_resolution( debug_dir: Option<&'static Path>, timeout: Duration, language: Option<&str>, + repo_dir: Option<&Path>, ) -> MergeResult { - let Ok(lang_profile) = LangProfile::find_by_filename_or_name(fname_base, language) else { + let Ok(lang_profile) = LangProfile::find_by_filename_or_name(fname_base, language, repo_dir) + else { return line_based_merge(&contents_base, contents_left, &contents_right, &settings); }; diff --git a/src/solve.rs b/src/solve.rs index ad9bbc0..a17540c 100644 --- a/src/solve.rs +++ b/src/solve.rs @@ -24,7 +24,8 @@ pub fn resolve_merge_cascading<'a>( ) -> Result { let mut solves = Vec::with_capacity(4); - let lang_profile = LangProfile::find_by_filename_or_name(fname_base, language)?; + let lang_profile = + LangProfile::find_by_filename_or_name(fname_base, language, Some(working_dir))?; let parsed = match ParsedMerge::parse(merge_contents, &settings) { Err(err) => { diff --git a/tests/failing.rs b/tests/failing.rs index 0370f94..9505bea 100644 --- a/tests/failing.rs +++ b/tests/failing.rs @@ -77,6 +77,7 @@ fn integration_failing( None, Duration::from_millis(0), language_override_for_test(&test_dir), + None, ); let actual = &merge_result.contents; @@ -143,6 +144,7 @@ please examine the new output and update ExpectedCurrently{suffix} if it looks o None, Duration::from_millis(0), None, + None, ); let actual_compact = &merge_result.contents; diff --git a/tests/timeout_support.rs b/tests/timeout_support.rs index 1772e2c..23da464 100644 --- a/tests/timeout_support.rs +++ b/tests/timeout_support.rs @@ -38,6 +38,7 @@ fn timeout_support() { None, Duration::from_millis(1), // very small timeout: structured merging should never be that fast None, + None, ); let expected = contents_expected.trim(); diff --git a/tests/working.rs b/tests/working.rs index 3988b27..e634a56 100644 --- a/tests/working.rs +++ b/tests/working.rs @@ -39,6 +39,7 @@ fn compare_against_merge( None, Duration::from_millis(0), language_override_for_test(test_dir), + None, ); let expected = contents_expected; -- 2.47.3 From ff3d0b80a1128ab52ea7f043648a56244ecf717d Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:04:40 +0200 Subject: [PATCH 02/12] Move documentation to section about manual language specification --- doc/src/usage.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/src/usage.md b/doc/src/usage.md index 4f0046b..997b4d2 100644 --- a/doc/src/usage.md +++ b/doc/src/usage.md @@ -60,11 +60,6 @@ $ mergiraf languages --gitattributes If you want to enable Mergiraf only in a certain repository, add the lines above in the `.gitattributes` file at the root of that repository instead, or in `.git/info/attributes` if you don't want it to be tracked in the repository. -If `mergiraf` does not recognize your file's language by extension, you may set the `mergiraf.language` attribute on the file to specify it manually: -``` -*.myjs mergiraf.language=javascript -``` - #### Trying it out An [example repository](https://codeberg.org/mergiraf/example-repo) is available for you to try out Mergiraf on simple examples: @@ -111,9 +106,13 @@ This will fall back on Git's regular merge heuristics, without requiring changes #### Manually specifying the file's language -You can use the `--language` option (short: `-L`) to specify the language of the files to merge. +If `mergiraf` does not recognize your file's language by extension, you can use the `--language` option (short: `-L`) to specify the language of the files to merge. It accepts both file extensions (`--language js`) and language names (`--language javascript`), as specified in the list of [supported languages](./languages.md). -This will override the language detection done by Mergiraf, which is currently based on file extensions only. + +Another option is to set the `mergiraf.language` attribute in a `gitattributes` file, making it possible to associate a specific language to all file paths matching a pattern: +``` +*.myjs mergiraf.language=javascript +``` #### Reporting a bad merge -- 2.47.3 From ad6ea4f0b6310ba20609c728d7968a5037a8e38f Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:05:18 +0200 Subject: [PATCH 03/12] Simplify `read_attribute_for_file` --- src/git.rs | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/git.rs b/src/git.rs index 768c5ee..d6fcb69 100644 --- a/src/git.rs +++ b/src/git.rs @@ -100,29 +100,25 @@ pub(crate) fn read_attribute_for_file( file_name: &Path, attr: &str, ) -> Option { - Command::new("git") - .args([ - "check-attr", - "-z", - attr, - "--", - &format!("{}", file_name.display()), - ]) + // We use null bytes as separators to avoid having to deal + // with the encoding of spaces in filenames. + let output = Command::new("git") + .args(["check-attr", "-z", attr, "--"]) + .arg(file_name) .current_dir(repo_dir) .output() .ok() - .filter(|output| output.status.success()) - .and_then(|output| { - output - .stdout - .split(|b| *b == b'\0') - .nth(2) - .map(|value| value.to_vec()) - }) - .and_then(|c| String::from_utf8(c).ok()) + .filter(|output| output.status.success())?; + // Parse the output of git-check-attr, which looks like with the `-z` flag: + // NUL NUL NUL + let bytes_value = output.stdout.split(|b| *b == b'\0').nth(2)?; + String::from_utf8(bytes_value.to_vec()).ok() } pub(crate) fn read_lang_attribute(repo_dir: &Path, file_name: &Path) -> Option { + // TODO: potentially the `read_attribute_for_file` could expose attribute values + // in a more structured way, for instance with an enum which picks out those specific variants + // to be excluded. read_attribute_for_file(repo_dir, file_name, "mergiraf.language") .filter(|value| value != "unspecified" && value != "set" && value != "unset") } -- 2.47.3 From 854af3ab0d4f44ebe7aa237d28b1da4f564d2f2f Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:06:30 +0200 Subject: [PATCH 04/12] Rename `LangProfile::find_by_filename_or_name` to `LangProfile::find` --- mgf_dev/src/main.rs | 6 +----- src/lang_profile.rs | 6 +++--- src/merge.rs | 3 +-- src/solve.rs | 3 +-- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/mgf_dev/src/main.rs b/mgf_dev/src/main.rs index 02cf522..e4cda69 100644 --- a/mgf_dev/src/main.rs +++ b/mgf_dev/src/main.rs @@ -92,11 +92,7 @@ fn real_main(args: &CliArgs) -> Result { let ref_arena = Arena::new(); let lang_profile = |language_determining_path| { - LangProfile::find_by_filename_or_name( - language_determining_path, - args.language.as_deref(), - None, - ) + LangProfile::find(language_determining_path, args.language.as_deref(), None) }; let contents = |path: &Path| -> Result, String> { diff --git a/src/lang_profile.rs b/src/lang_profile.rs index defbb92..28f14db 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -107,7 +107,7 @@ impl LangProfile { } /// Loads a language either by name or by detecting it from a filename - pub fn find_by_filename_or_name

( + pub fn find

( filename: P, language_name: Option<&str>, repo_dir: Option<&Path>, @@ -532,7 +532,7 @@ mod tests { #[test] fn find_by_filename_or_name() { fn find(filename: &str, name: Option<&str>) -> Result<&'static LangProfile, String> { - LangProfile::find_by_filename_or_name(filename, name, None) + LangProfile::find(filename, name, None) } assert_eq!(find("file.json", None).unwrap().name, "JSON"); assert_eq!(find("file.java", Some("JSON")).unwrap().name, "JSON"); @@ -671,7 +671,7 @@ mod tests { name: Option<&str>, repo_dir: &Path, ) -> Result<&'static LangProfile, String> { - LangProfile::find_by_filename_or_name(filename, name, Some(repo_dir)) + LangProfile::find(filename, name, Some(repo_dir)) } let find = |filename, name| find_impl(filename, name, tempdir.path()); assert_eq!( diff --git a/src/merge.rs b/src/merge.rs index f7a8c46..805dab0 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -38,8 +38,7 @@ pub fn line_merge_and_structured_resolution( language: Option<&str>, repo_dir: Option<&Path>, ) -> MergeResult { - let Ok(lang_profile) = LangProfile::find_by_filename_or_name(fname_base, language, repo_dir) - else { + let Ok(lang_profile) = LangProfile::find(fname_base, language, repo_dir) else { return line_based_merge(&contents_base, contents_left, &contents_right, &settings); }; diff --git a/src/solve.rs b/src/solve.rs index a17540c..adc35e7 100644 --- a/src/solve.rs +++ b/src/solve.rs @@ -24,8 +24,7 @@ pub fn resolve_merge_cascading<'a>( ) -> Result { let mut solves = Vec::with_capacity(4); - let lang_profile = - LangProfile::find_by_filename_or_name(fname_base, language, Some(working_dir))?; + let lang_profile = LangProfile::find(fname_base, language, Some(working_dir))?; let parsed = match ParsedMerge::parse(merge_contents, &settings) { Err(err) => { -- 2.47.3 From 8c58da9a0a0084886e8029eba2650e1bccd5d922 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:08:41 +0200 Subject: [PATCH 05/12] Update docs of `LangProfile::find` --- src/lang_profile.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 28f14db..587006c 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -106,7 +106,10 @@ impl LangProfile { git::read_lang_attribute(repo_dir, filename.as_ref()) } - /// Loads a language either by name or by detecting it from a filename + /// Loads a language, by: + /// - first, looking up the language using its name if provided + /// - failing that, by detecting it via configuration from the gitattributes file + /// - failing that, by detecting it from a filename pub fn find

( filename: P, language_name: Option<&str>, -- 2.47.3 From ba67973702977c63a64f75d4dce1e70746e84dce Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:11:10 +0200 Subject: [PATCH 06/12] Add comment about language lookup logic --- src/lang_profile.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 587006c..0683688 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -122,6 +122,8 @@ impl LangProfile { if let Some(lang_name) = language_name { Self::find_by_name(lang_name) .ok_or_else(|| format!("Specified language '{lang_name}' could not be found")) + // If lookup by name failed, we don't fall back on the other detection methods, + // because don't want to silently ignore an invalid language name. } else if let Some(lang_name) = repo_dir.and_then(|repo_dir| Self::detect_language_from_vcs_attr(repo_dir, filename)) { -- 2.47.3 From 0e1caa87502d0f7ce0464fa12a6c0e4483579161 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:12:20 +0200 Subject: [PATCH 07/12] Replace by if-let chain for readability --- src/lang_profile.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 0683688..6cc9a9f 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -124,8 +124,8 @@ impl LangProfile { .ok_or_else(|| format!("Specified language '{lang_name}' could not be found")) // If lookup by name failed, we don't fall back on the other detection methods, // because don't want to silently ignore an invalid language name. - } else if let Some(lang_name) = - repo_dir.and_then(|repo_dir| Self::detect_language_from_vcs_attr(repo_dir, filename)) + } else if let Some(repo_dir) = repo_dir + && let Some(lang_name) = Self::detect_language_from_vcs_attr(repo_dir, filename) { Self::find_by_name(&lang_name).ok_or_else(|| { format!("Attribute-specified language '{lang_name}' could not be found") -- 2.47.3 From 2a759228162d967befec907e230579e618b6ca91 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:16:18 +0200 Subject: [PATCH 08/12] Inline `find_impl` --- src/lang_profile.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 6cc9a9f..3275ed4 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -671,14 +671,7 @@ mod tests { .output() .expect("failed to commit attribute file"); - fn find_impl( - filename: &str, - name: Option<&str>, - repo_dir: &Path, - ) -> Result<&'static LangProfile, String> { - LangProfile::find(filename, name, Some(repo_dir)) - } - let find = |filename, name| find_impl(filename, name, tempdir.path()); + let find = |filename, name| LangProfile::find(filename, name, Some(tempdir.path())); assert_eq!( find("file.bogus", None).unwrap_err(), "Attribute-specified language 'bogus' could not be found", -- 2.47.3 From ac3d50140385c7200b9945d1bbd5dee7ec138fd8 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 14:52:45 +0200 Subject: [PATCH 09/12] cargo fmt --- src/lang_profile.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index 3275ed4..b6b7e46 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -3,7 +3,9 @@ use std::{collections::HashSet, ffi::OsStr, fmt::Display, hash::Hash, path::Path use itertools::Itertools; use tree_sitter::Language; -use crate::{ast::AstNode, git, signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES}; +use crate::{ + ast::AstNode, git, signature::SignatureDefinition, supported_langs::SUPPORTED_LANGUAGES, +}; /// Language-dependent settings to influence how merging is done. /// All those settings are declarative (except for the tree-sitter parser, which is -- 2.47.3 From bc4c1cf4c5b2fab5facb6070c7f36e6c29b74fa3 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 21:25:06 +0200 Subject: [PATCH 10/12] Make sure all Linguist language names are recognized by mergiraf --- src/supported_langs.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/supported_langs.rs b/src/supported_langs.rs index 28fd541..ff2ee98 100644 --- a/src/supported_langs.rs +++ b/src/supported_langs.rs @@ -438,7 +438,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "go.mod", - alternate_names: &[], + alternate_names: &["Go module", "go mod"], extensions: vec![], file_names: vec!["go.mod"], language: tree_sitter_gomod_orchard::LANGUAGE.into(), @@ -511,9 +511,9 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "go.sum", - alternate_names: &[], + alternate_names: &["Go checksums"], extensions: vec![], - file_names: vec!["go.sum"], + file_names: vec!["go.sum", "go.work.sum"], language: tree_sitter_gosum_orchard::LANGUAGE.into(), atomic_nodes: vec![], commutative_parents: vec![ @@ -765,7 +765,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "C#", - alternate_names: &["CSharp"], + alternate_names: &["CSharp", "cake", "cakescript"], extensions: vec!["cs"], file_names: vec![], language: tree_sitter_c_sharp::LANGUAGE.into(), @@ -916,7 +916,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { }, LangProfile { name: "Python", - alternate_names: &[], + alternate_names: &["Python3"], extensions: vec!["py"], file_names: vec![], language: tree_sitter_python_orchard::LANGUAGE.into(), @@ -970,7 +970,7 @@ pub static SUPPORTED_LANGUAGES: LazyLock> = LazyLock::new(|| { LangProfile { name: "PHP", alternate_names: &[], - extensions: vec!["php", "phtml"], + extensions: vec!["php", "phtml", "php3", "php4", "php5", "phps", "phpt"], file_names: vec![], language: tree_sitter_php::LANGUAGE_PHP.into(), // optional settings, explained below -- 2.47.3 From 60e74cc0b6401349b3fa97cac94f543a1f01475e Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 17 Sep 2025 21:30:26 +0200 Subject: [PATCH 11/12] Add fallback to `linguist-language` attribute --- doc/src/usage.md | 4 ++-- src/git.rs | 8 ++++++-- src/lang_profile.rs | 27 ++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/src/usage.md b/doc/src/usage.md index 997b4d2..f71742d 100644 --- a/doc/src/usage.md +++ b/doc/src/usage.md @@ -109,9 +109,9 @@ This will fall back on Git's regular merge heuristics, without requiring changes If `mergiraf` does not recognize your file's language by extension, you can use the `--language` option (short: `-L`) to specify the language of the files to merge. It accepts both file extensions (`--language js`) and language names (`--language javascript`), as specified in the list of [supported languages](./languages.md). -Another option is to set the `mergiraf.language` attribute in a `gitattributes` file, making it possible to associate a specific language to all file paths matching a pattern: +Another option is to set the `linguist-language` attribute in a `gitattributes` file, making it possible to associate a specific language to all file paths matching a pattern: ``` -*.myjs mergiraf.language=javascript +*.myjs linguist-language=javascript ``` #### Reporting a bad merge diff --git a/src/git.rs b/src/git.rs index d6fcb69..ce44970 100644 --- a/src/git.rs +++ b/src/git.rs @@ -119,6 +119,10 @@ pub(crate) fn read_lang_attribute(repo_dir: &Path, file_name: &Path) -> Option Date: Tue, 30 Sep 2025 14:28:04 +0200 Subject: [PATCH 12/12] cargo fmt --- src/lang_profile.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/lang_profile.rs b/src/lang_profile.rs index acdb84d..4678ea6 100644 --- a/src/lang_profile.rs +++ b/src/lang_profile.rs @@ -654,11 +654,9 @@ mod tests { "*.bogus.mgf mergiraf.language=bogus\n", "*.js.mgf mergiraf.language=javascript\n", "*.myjs.mgf mergiraf.language=javascript\n", - // Test that fallback to `linguist-language` works. "unspecified.bogus.mgf !mergiraf.language\n", "unset.bogus.mgf -mergiraf.language\n", - "*.bogus linguist-language=bogus\n", "*.js linguist-language=javascript\n", "*.myjs linguist-language=javascript\n", @@ -701,10 +699,19 @@ mod tests { ); assert_eq!(find("file.js", None).unwrap().name, "Javascript"); assert_eq!(find("file.myjs", None).unwrap().name, "Javascript"); - assert_eq!(find("file.bogus.mgf", Some("python")).unwrap().name, "Python"); - assert_eq!(find("file.noattr.mgf", Some("python")).unwrap().name, "Python"); + assert_eq!( + find("file.bogus.mgf", Some("python")).unwrap().name, + "Python" + ); + assert_eq!( + find("file.noattr.mgf", Some("python")).unwrap().name, + "Python" + ); assert_eq!(find("file.js.mgf", Some("python")).unwrap().name, "Python"); - assert_eq!(find("file.myjs.mgf", Some("python")).unwrap().name, "Python"); + assert_eq!( + find("file.myjs.mgf", Some("python")).unwrap().name, + "Python" + ); assert_eq!(find("file.bogus", Some("python")).unwrap().name, "Python"); assert_eq!(find("file.noattr", Some("python")).unwrap().name, "Python"); assert_eq!(find("file.js", Some("python")).unwrap().name, "Python"); -- 2.47.3