fix(AstNode::parse
): respect char boundaries of source
when informing about a parse error #558
3 changed files with 44 additions and 5 deletions
24
src/ast.rs
24
src/ast.rs
|
@ -3,7 +3,7 @@ use std::iter::zip;
|
|||
use std::{
|
||||
borrow::Cow,
|
||||
cell::UnsafeCell,
|
||||
cmp::{max, min},
|
||||
cmp::max,
|
||||
fmt::Display,
|
||||
hash::{Hash, Hasher},
|
||||
ops::Range,
|
||||
|
@ -21,6 +21,7 @@ use tree_sitter::{
|
|||
use typed_arena::Arena;
|
||||
|
||||
use crate::{
|
||||
StrExt,
|
||||
lang_profile::{CommutativeParent, LangProfile, ParentType},
|
||||
signature::{Signature, SignatureDefinition},
|
||||
};
|
||||
|
@ -311,13 +312,19 @@ impl<'a> AstNode<'a> {
|
|||
let local_source = &global_source[range.start..range.end];
|
||||
if node.is_error() {
|
||||
let full_range = node.range();
|
||||
|
||||
// it can be that byte 32 doesn't lie on char boundary,
|
||||
// so increase the index until it does
|
||||
#[expect(unstable_name_collisions)]
|
||||
let idx = local_source.ceil_char_boundary(32);
|
||||
|
||||
return Err(format!(
|
||||
"parse error at {}:{}..{}:{}, starting with: {}",
|
||||
full_range.start_point.row,
|
||||
full_range.start_point.column,
|
||||
full_range.end_point.row,
|
||||
full_range.end_point.column,
|
||||
&local_source[..min(32, local_source.len())]
|
||||
&local_source[..idx]
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -1153,12 +1160,25 @@ mod tests {
|
|||
let ctx = ctx();
|
||||
let lang_profile = LangProfile::detect_from_filename("test.json")
|
||||
.expect("could not load language profile");
|
||||
|
||||
let parse = AstNode::parse("[\n {,\n]", lang_profile, &ctx.arena, &ctx.ref_arena);
|
||||
|
||||
assert_eq!(
|
||||
parse,
|
||||
Err("parse error at 1:1..1:3, starting with: {,".to_string())
|
||||
);
|
||||
|
||||
let parse = AstNode::parse(
|
||||
"属于个人的非赢利性开源项目",
|
||||
lang_profile,
|
||||
&ctx.arena,
|
||||
&ctx.ref_arena,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse,
|
||||
Err("parse error at 0:0..0:39, starting with: 属于个人的非赢利性开源".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -32,11 +32,11 @@ pub(crate) mod merged_tree;
|
|||
pub(crate) mod multimap;
|
||||
pub mod newline;
|
||||
pub(crate) mod parsed_merge;
|
||||
mod path_buf_ext;
|
||||
pub(crate) mod pcs;
|
||||
pub mod settings;
|
||||
pub(crate) mod signature;
|
||||
mod solve;
|
||||
mod std_ext;
|
||||
pub(crate) mod structured;
|
||||
pub mod supported_langs;
|
||||
#[cfg(test)]
|
||||
|
@ -59,7 +59,7 @@ use settings::DisplaySettings;
|
|||
use structured::structured_merge;
|
||||
use supported_langs::SUPPORTED_LANGUAGES;
|
||||
|
||||
pub use path_buf_ext::PathBufExt;
|
||||
pub use std_ext::{PathBufExt, StrExt};
|
||||
|
||||
/// Current way to disable Mergiraf
|
||||
/// ## Usage
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
//! currently unstable things from stdlib, vendored in
|
||||
|
||||
use std::{
|
||||
ffi::OsStr,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// a temporary trait to implement currently unstable methods
|
||||
pub trait PathBufExt {
|
||||
/// <https://doc.rust-lang.org/std/path/struct.PathBuf.html#method.leak>
|
||||
fn leak<'a>(self) -> &'a mut Path;
|
||||
|
@ -21,3 +22,21 @@ impl PathBufExt for PathBuf {
|
|||
Self::from(new_path)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait StrExt {
|
||||
/// <https://doc.rust-lang.org/std/primitive.str.html#method.ceil_char_boundary>
|
||||
fn ceil_char_boundary(self, index: usize) -> usize;
|
||||
}
|
||||
|
||||
impl StrExt for &'_ str {
|
||||
fn ceil_char_boundary(self, index: usize) -> usize {
|
||||
let len = self.len();
|
||||
if index > len {
|
||||
len
|
||||
} else {
|
||||
(index..len)
|
||||
.find(|&i| self.is_char_boundary(i))
|
||||
.expect("`i = len` must be a char boundary") // otherwise `self` wouldn't have been a valid `&str` to begin with
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue