fix(AstNode::parse): respect char boundaries of source when informing about a parse error #558

Merged
wetneb merged 3 commits from ada4a/mergiraf:ast-parse-fail-char-boundary into main 2025-08-02 09:35:02 +02:00

View file

@ -3,7 +3,7 @@ use std::iter::zip;
use std::{
borrow::Cow,
cell::UnsafeCell,
cmp::{max, min},
cmp::max,
fmt::Display,
hash::{Hash, Hasher},
ops::Range,
@ -21,6 +21,7 @@ use tree_sitter::{
use typed_arena::Arena;
use crate::{
StrExt,
lang_profile::{CommutativeParent, LangProfile, ParentType},
signature::{Signature, SignatureDefinition},
};
@ -311,13 +312,19 @@ impl<'a> AstNode<'a> {
let local_source = &global_source[range.start..range.end];
if node.is_error() {
let full_range = node.range();
// it can be that byte 32 doesn't lie on char boundary,
// so increase the index until it does
#[expect(unstable_name_collisions)]
let idx = local_source.ceil_char_boundary(32);
return Err(format!(
"parse error at {}:{}..{}:{}, starting with: {}",
full_range.start_point.row,
full_range.start_point.column,
full_range.end_point.row,
full_range.end_point.column,
&local_source[..min(32, local_source.len())]
&local_source[..idx]
));
}
@ -1153,12 +1160,25 @@ mod tests {
let ctx = ctx();
let lang_profile = LangProfile::detect_from_filename("test.json")
.expect("could not load language profile");
let parse = AstNode::parse("[\n {,\n]", lang_profile, &ctx.arena, &ctx.ref_arena);
assert_eq!(
parse,
Err("parse error at 1:1..1:3, starting with: {,".to_string())
);
let parse = AstNode::parse(
"属于个人的非赢利性开源项目",
lang_profile,
&ctx.arena,
&ctx.ref_arena,
);
assert_eq!(
parse,
Err("parse error at 0:0..0:39, starting with: 属于个人的非赢利性开源".to_string())
);
}
#[test]

View file

@ -32,11 +32,11 @@ pub(crate) mod merged_tree;
pub(crate) mod multimap;
pub mod newline;
pub(crate) mod parsed_merge;
mod path_buf_ext;
pub(crate) mod pcs;
pub mod settings;
pub(crate) mod signature;
mod solve;
mod std_ext;
pub(crate) mod structured;
pub mod supported_langs;
#[cfg(test)]
@ -59,7 +59,7 @@ use settings::DisplaySettings;
use structured::structured_merge;
use supported_langs::SUPPORTED_LANGUAGES;
pub use path_buf_ext::PathBufExt;
pub use std_ext::{PathBufExt, StrExt};
/// Current way to disable Mergiraf
/// ## Usage

View file

@ -1,9 +1,10 @@
//! currently unstable things from stdlib, vendored in
use std::{
ffi::OsStr,
path::{Path, PathBuf},
};
/// a temporary trait to implement currently unstable methods
pub trait PathBufExt {
/// <https://doc.rust-lang.org/std/path/struct.PathBuf.html#method.leak>
fn leak<'a>(self) -> &'a mut Path;
@ -21,3 +22,21 @@ impl PathBufExt for PathBuf {
Self::from(new_path)
}
}
pub trait StrExt {
/// <https://doc.rust-lang.org/std/primitive.str.html#method.ceil_char_boundary>
fn ceil_char_boundary(self, index: usize) -> usize;
}
impl StrExt for &'_ str {
fn ceil_char_boundary(self, index: usize) -> usize {
let len = self.len();
if index > len {
len
} else {
(index..len)
.find(|&i| self.is_char_boundary(i))
.expect("`i = len` must be a char boundary") // otherwise `self` wouldn't have been a valid `&str` to begin with
}
}
}