diff --git a/src/ast.rs b/src/ast.rs index cab97e2..9a59b4e 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3,7 +3,7 @@ use std::iter::zip; use std::{ borrow::Cow, cell::UnsafeCell, - cmp::{max, min}, + cmp::max, fmt::Display, hash::{Hash, Hasher}, ops::Range, @@ -21,6 +21,7 @@ use tree_sitter::{ use typed_arena::Arena; use crate::{ + StrExt, lang_profile::{CommutativeParent, LangProfile, ParentType}, signature::{Signature, SignatureDefinition}, }; @@ -311,13 +312,19 @@ impl<'a> AstNode<'a> { let local_source = &global_source[range.start..range.end]; if node.is_error() { let full_range = node.range(); + + // it can be that byte 32 doesn't lie on char boundary, + // so increase the index until it does + #[expect(unstable_name_collisions)] + let idx = local_source.ceil_char_boundary(32); + return Err(format!( "parse error at {}:{}..{}:{}, starting with: {}", full_range.start_point.row, full_range.start_point.column, full_range.end_point.row, full_range.end_point.column, - &local_source[..min(32, local_source.len())] + &local_source[..idx] )); } @@ -1153,12 +1160,25 @@ mod tests { let ctx = ctx(); let lang_profile = LangProfile::detect_from_filename("test.json") .expect("could not load language profile"); + let parse = AstNode::parse("[\n {,\n]", lang_profile, &ctx.arena, &ctx.ref_arena); assert_eq!( parse, Err("parse error at 1:1..1:3, starting with: {,".to_string()) ); + + let parse = AstNode::parse( + "属于个人的非赢利性开源项目", + lang_profile, + &ctx.arena, + &ctx.ref_arena, + ); + + assert_eq!( + parse, + Err("parse error at 0:0..0:39, starting with: 属于个人的非赢利性开源".to_string()) + ); } #[test] diff --git a/src/lib.rs b/src/lib.rs index cc864a9..80d2ad2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,11 +32,11 @@ pub(crate) mod merged_tree; pub(crate) mod multimap; pub mod newline; pub(crate) mod parsed_merge; -mod path_buf_ext; pub(crate) mod pcs; pub mod settings; pub(crate) mod signature; mod solve; +mod std_ext; pub(crate) mod structured; pub mod supported_langs; #[cfg(test)] @@ -59,7 +59,7 @@ use settings::DisplaySettings; use structured::structured_merge; use supported_langs::SUPPORTED_LANGUAGES; -pub use path_buf_ext::PathBufExt; +pub use std_ext::{PathBufExt, StrExt}; /// Current way to disable Mergiraf /// ## Usage diff --git a/src/path_buf_ext.rs b/src/std_ext.rs similarity index 53% rename from src/path_buf_ext.rs rename to src/std_ext.rs index 4788ac4..24949c3 100644 --- a/src/path_buf_ext.rs +++ b/src/std_ext.rs @@ -1,9 +1,10 @@ +//! currently unstable things from stdlib, vendored in + use std::{ ffi::OsStr, path::{Path, PathBuf}, }; -/// a temporary trait to implement currently unstable methods pub trait PathBufExt { /// fn leak<'a>(self) -> &'a mut Path; @@ -21,3 +22,21 @@ impl PathBufExt for PathBuf { Self::from(new_path) } } + +pub trait StrExt { + /// + fn ceil_char_boundary(self, index: usize) -> usize; +} + +impl StrExt for &'_ str { + fn ceil_char_boundary(self, index: usize) -> usize { + let len = self.len(); + if index > len { + len + } else { + (index..len) + .find(|&i| self.is_char_boundary(i)) + .expect("`i = len` must be a char boundary") // otherwise `self` wouldn't have been a valid `&str` to begin with + } + } +}