#![deny(missing_docs)]
extern crate gimli;
extern crate memmap;
extern crate object;
extern crate owning_ref;
extern crate fallible_iterator;
#[cfg(feature = "rustc-demangle")]
extern crate rustc_demangle;
#[cfg(feature = "cpp_demangle")]
extern crate cpp_demangle;
#[macro_use]
extern crate error_chain;
use owning_ref::OwningHandle;
use fallible_iterator::FallibleIterator;
use std::fmt;
use std::sync;
use std::path;
use std::error;
use std::borrow::Cow;
#[derive(Debug)]
pub enum DebugInfoError {
InvalidDebugLineTarget,
MissingComplilationUnit,
UnitWithoutCompilationUnit,
SubroutineMissingName(usize, usize),
DanglingEntryOffset,
RangeBothContiguousAndNot,
RangeInverted,
}
impl fmt::Display for DebugInfoError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
DebugInfoError::InvalidDebugLineTarget => {
write!(f, "DebugLine referst to a file that does not exist")
}
DebugInfoError::MissingComplilationUnit => {
write!(
f,
"A unit was completely empty (i.e., did not contain a compilation unit)"
)
}
DebugInfoError::UnitWithoutCompilationUnit => {
write!(f, "The first entry in a unit is not a compilation unit")
}
DebugInfoError::SubroutineMissingName(u, r) => {
write!(f, "A subroutine (<{:x}><{:x}>) has no name", u, r)
}
DebugInfoError::DanglingEntryOffset => write!(f, "Entry offset points to empty entry"),
DebugInfoError::RangeBothContiguousAndNot => {
write!(f, "Asked to parse non-contiguous range as contiguous.")
}
DebugInfoError::RangeInverted => write!(f, "A range was inverted (high > low)"),
}
}
}
impl error::Error for DebugInfoError {
fn description(&self) -> &str {
"An error occured while traversing debug symbols"
}
}
mod errors {
use gimli;
use std::io;
use super::DebugInfoError;
error_chain! {
foreign_links {
Gimli(gimli::Error) #[doc="An error given by the `gimli` library while parsing the DWARF debug symbols."];
BadPath(io::Error) #[doc="The path given could not be used to extract debug symbols."];
InvalidDebugSymbols(DebugInfoError) #[doc="An error occured while traversing the debug symbols in the provided executable."];
}
errors {
MissingDebugSection(s: &'static str) {
description("missing debug section")
display("missing debug section: '.{}'", s)
}
}
}
}
pub use errors::*;
#[derive(Clone, Copy, Default)]
pub struct Options {
with_functions: bool,
with_demangling: bool,
}
impl Options {
pub fn with_functions(mut self) -> Self {
self.with_functions = true;
self
}
#[cfg(any(feature = "cpp_demangle", feature = "rustc-demangle"))]
pub fn with_demangling(mut self) -> Self {
self.with_demangling = true;
self.with_functions()
}
pub fn build<P>(self, file_path: P) -> Result<Mapping>
where
P: AsRef<path::Path>,
{
Mapping::new_inner(file_path.as_ref(), self)
}
}
pub struct Mapping {
inner: OwningHandle<Box<memmap::Mmap>, Box<MmapDerived<'static>>>,
}
struct MmapDerived<'mmap> {
inner: OwningHandle<Box<object::File<'mmap>>, Box<EndianDebugInfo<'mmap>>>,
}
enum EndianDebugInfo<'object> {
LEInfo(DebugInfo<'object, gimli::LittleEndian>),
BEInfo(DebugInfo<'object, gimli::BigEndian>),
}
struct DebugInfo<'object, Endian>
where
Endian: gimli::Endianity,
{
debug_line: gimli::DebugLine<'object, Endian>,
units: Vec<Unit<'object, Endian>>,
opts: Options,
}
impl Mapping {
pub fn new<P>(file_path: P) -> Result<Mapping>
where
P: AsRef<path::Path>,
{
Options::default().build(file_path)
}
fn new_inner(file_path: &path::Path, opts: Options) -> Result<Mapping> {
let file = memmap::Mmap::open_path(file_path, memmap::Protection::Read)
.map_err(|e| ErrorKind::BadPath(e))?;
OwningHandle::try_new(Box::new(file), |mmap| -> Result<_> {
let mmap: &memmap::Mmap = unsafe { &*mmap };
let file = object::File::parse(unsafe { mmap.as_slice() })?;
OwningHandle::try_new(Box::new(file), |file| -> Result<_> {
let file: &object::File = unsafe { &*file };
Self::symbolicate(file, opts)
.chain_err(|| "failed to analyze debug information")
.map(|di| Box::new(di))
}).map(|di| Box::new(MmapDerived { inner: di }))
}).map(|di| Mapping { inner: di })
}
pub fn locate(
&self,
addr: u64,
) -> Result<Option<(path::PathBuf, Option<u64>, Option<Cow<str>>)>> {
self.inner.locate(addr)
}
fn symbolicate<'a>(file: &'a object::File, opts: Options) -> Result<EndianDebugInfo<'a>> {
if file.is_little_endian() {
Ok(EndianDebugInfo::LEInfo(DebugInfo::new(file, opts)?))
} else {
Ok(EndianDebugInfo::BEInfo(DebugInfo::new(file, opts)?))
}
}
}
impl<'object> EndianDebugInfo<'object> {
fn locate(&self, addr: u64) -> Result<Option<(path::PathBuf, Option<u64>, Option<Cow<str>>)>> {
match *self {
EndianDebugInfo::LEInfo(ref dbg) => dbg.locate(addr),
EndianDebugInfo::BEInfo(ref dbg) => dbg.locate(addr),
}
}
}
impl<'object, Endian> DebugInfo<'object, Endian>
where
Endian: gimli::Endianity,
{
fn new<'a>(file: &'a object::File, opts: Options) -> Result<DebugInfo<'a, Endian>> {
let debug_info = file.get_section(".debug_info")
.ok_or(ErrorKind::MissingDebugSection("debug_info"))?;
let debug_info = gimli::DebugInfo::<Endian>::new(debug_info);
let debug_abbrev = file.get_section(".debug_abbrev")
.ok_or(ErrorKind::MissingDebugSection("debug_abbrev"))?;
let debug_abbrev = gimli::DebugAbbrev::<Endian>::new(debug_abbrev);
let debug_line = file.get_section(".debug_line")
.ok_or(ErrorKind::MissingDebugSection("debug_line"))?;
let debug_line = gimli::DebugLine::<Endian>::new(debug_line);
let debug_ranges = file.get_section(".debug_ranges").unwrap_or(&[]);
let debug_ranges = gimli::DebugRanges::<Endian>::new(debug_ranges);
let debug_str = file.get_section(".debug_str").unwrap_or(&[]);
let debug_str = gimli::DebugStr::<Endian>::new(debug_str);
let mut units = Vec::new();
let mut headers = debug_info.units();
while let Some(header) = headers.next().chain_err(|| "couldn't get DIE header")? {
let unit = Unit::parse(
&debug_abbrev,
&debug_ranges,
&debug_line,
&debug_str,
&header,
opts,
);
let unit = unit.chain_err(|| "encountered invalid compilation unit")?;
if let Some(unit) = unit {
units.push(unit);
}
}
Ok(DebugInfo {
debug_line: debug_line,
units: units,
opts: opts,
})
}
pub fn locate(
&self,
addr: u64,
) -> Result<Option<(path::PathBuf, Option<u64>, Option<Cow<str>>)>> {
for unit in &self.units {
if !unit.contains_address(addr) {
continue;
}
let mut rowi = 0;
let mut current = None;
let rows = unit.cache_every.and_then(|cache_every| {
unit.skiplist.read().ok().and_then(|skiplist| {
match skiplist.binary_search_by_key(&addr, |&(raddr, _, _)| raddr) {
Ok(i) => {
current = Some(skiplist[i].2);
rowi = (i + 1) * cache_every + 1;
Some(skiplist[i].1.clone())
}
Err(i) if i == 0 => {
None
}
Err(i) => {
current = Some(skiplist[i - 1].2);
rowi = i * cache_every + 1;
Some(skiplist[i - 1].1.clone())
}
}
})
});
let mut rows = if let Some(rows) = rows {
rows
} else {
unit.line_rows(&self.debug_line)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "cannot get line rows for unit")?
};
let mut praddr = 0;
let mut skipseq = false;
while let Ok(Some((_, &row))) = rows.next_row() {
if row.end_sequence() {
current = None;
skipseq = false;
continue;
}
if skipseq {
continue;
}
let raddr = row.address();
if raddr < praddr {
skipseq = true;
continue;
}
praddr = raddr;
if raddr <= addr {
current = Some(row);
if let Some(cache_every) = unit.cache_every {
if rowi != 0 && rowi % cache_every == 0 {
if let Ok(mut skiplist) = unit.skiplist.write() {
let i = rowi / cache_every - 1;
if i >= skiplist.len() {
debug_assert!(
i == skiplist.len(),
"we somehow didn't cache a StateMachine for a \
previous iteration step!"
);
skiplist.push((row.address(), rows.clone(), row));
}
}
}
}
rowi += 1;
continue;
}
break;
}
if current.is_none() {
return Ok(None);
}
let row = current.unwrap();
let header = rows.header();
let file = row.file(header)
.ok_or_else(|| {
ErrorKind::InvalidDebugSymbols(DebugInfoError::InvalidDebugLineTarget)
})?;
let mut path = path::PathBuf::new();
if let Some(directory) = file.directory(header) {
let directory = directory.to_string_lossy();
if !directory.starts_with('/') {
if let Some(comp_dir) = unit.comp_dir() {
path.push(&*comp_dir.to_string_lossy());
}
}
path.push(&*directory);
}
path.push(&*file.path_name().to_string_lossy());
let line = row.line();
if unit.programs.is_empty() {
return Ok(Some((path, line, None)));
}
let mut func: Option<(&Program, &gimli::Range, u64)> = None;
for p in &unit.programs {
if !p.contains_address(addr) {
continue;
}
let (range, dist) = p.ranges
.iter()
.filter(|range| addr >= range.begin && addr < range.end)
.map(|range| (range, addr - range.begin))
.min_by_key(|&(_, dist)| dist)
.expect("p.contains_address() is true, but no matching range found");
if let Some((prev, prange, pdist)) = func.take() {
func = if dist == pdist {
if range.end <= prange.end {
Some((p, range, dist))
} else {
Some((prev, prange, pdist))
}
} else if dist < pdist {
Some((p, range, dist))
} else {
Some((prev, prange, pdist))
};
} else {
func = Some((p, range, dist));
}
}
let func = func.map(|u| {
if unit.language.is_some() {
debug_assert!(
self.opts.with_demangling,
"We shouldn't even bother finding the DW_AT_language if we \
aren't demangling"
);
}
match unit.language {
Some(gimli::DW_LANG_C_plus_plus) |
Some(gimli::DW_LANG_C_plus_plus_03) |
Some(gimli::DW_LANG_C_plus_plus_11) => demangle_cpp_symbol(u.0.name),
Some(gimli::DW_LANG_Rust) => demangle_rust_symbol(u.0.name),
_ => u.0.name.to_string_lossy(),
}
});
return Ok(Some((path, line, func)));
}
Ok(None)
}
}
#[cfg(feature = "cpp_demangle")]
fn demangle_cpp_symbol(mangled: &std::ffi::CStr) -> Cow<str> {
if let Ok(sym) = cpp_demangle::Symbol::new(mangled.to_bytes()) {
Cow::from(format!("{}", sym))
} else {
mangled.to_string_lossy()
}
}
#[cfg(not(feature = "cpp_demangle"))]
fn demangle_cpp_symbol(mangled: &std::ffi::CStr) -> Cow<str> {
mangled.to_string_lossy()
}
#[cfg(feature = "rustc-demangle")]
fn demangle_rust_symbol(mangled: &std::ffi::CStr) -> Cow<str> {
Cow::from(format!(
"{}",
rustc_demangle::demangle(mangled.to_string_lossy().as_ref())
))
}
#[cfg(not(feature = "rustc-demangle"))]
fn demangle_rust_symbol(mangled: &std::ffi::CStr) -> Cow<str> {
mangled.to_string_lossy()
}
use std::marker::PhantomData;
struct Unit<'input, Endian>
where
Endian: gimli::Endianity,
{
skiplist: sync::RwLock<
Vec<
(
u64,
gimli::StateMachine<
'input,
gimli::IncompleteLineNumberProgram<'input, Endian>,
Endian,
>,
gimli::LineNumberRow,
),
>,
>,
cache_every: Option<usize>,
address_size: u8,
base_address: u64,
ranges: Vec<gimli::Range>,
line_offset: gimli::DebugLineOffset,
comp_dir: Option<&'input std::ffi::CStr>,
comp_name: Option<&'input std::ffi::CStr>,
programs: Vec<Program<'input>>,
language: Option<gimli::DwLang>,
phantom: PhantomData<Endian>,
}
impl<'input, Endian> Unit<'input, Endian>
where
Endian: gimli::Endianity,
{
fn parse(
debug_abbrev: &gimli::DebugAbbrev<Endian>,
debug_ranges: &gimli::DebugRanges<Endian>,
debug_line: &gimli::DebugLine<'input, Endian>,
debug_str: &gimli::DebugStr<'input, Endian>,
header: &gimli::CompilationUnitHeader<'input, Endian>,
opts: Options,
) -> Result<Option<Unit<'input, Endian>>> {
let abbrev = header
.abbreviations(*debug_abbrev)
.chain_err(|| "compilation unit refers to non-existing abbreviations")?;
let mut entries = header.entries(&abbrev);
let mut unit = {
let (_, entry) = entries
.next_dfs()
.chain_err(|| "compilation unit is broken")?
.ok_or_else(|| {
ErrorKind::InvalidDebugSymbols(DebugInfoError::UnitWithoutCompilationUnit)
})?;
if entry.tag() != gimli::DW_TAG_compile_unit {
return Err(
ErrorKind::InvalidDebugSymbols(DebugInfoError::MissingComplilationUnit).into(),
);
}
let base_address = match entry.attr_value(gimli::DW_AT_low_pc) {
Ok(Some(gimli::AttributeValue::Addr(addr))) => addr,
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid low_pc attribute")
}
_ => {
match entry.attr_value(gimli::DW_AT_entry_pc) {
Ok(Some(gimli::AttributeValue::Addr(addr))) => addr,
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid entry_pc attribute")
}
_ => 0,
}
}
};
let ranges = Self::get_ranges(entry, debug_ranges, header.address_size(), base_address)
.chain_err(|| "compilation unit has invalid ranges")?;
if ranges.is_empty() {
return Ok(None);
}
let line_offset = match entry.attr_value(gimli::DW_AT_stmt_list) {
Ok(Some(gimli::AttributeValue::DebugLineRef(offset))) => offset,
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid compilation unit statement list")
}
_ => return Ok(None),
};
let comp_dir = entry
.attr(gimli::DW_AT_comp_dir)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid compilation unit directory")?
.and_then(|attr| attr.string_value(debug_str));
let comp_name = entry
.attr(gimli::DW_AT_name)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid compilation unit name")?
.and_then(|attr| attr.string_value(debug_str));
let language = if opts.with_demangling {
entry
.attr(gimli::DW_AT_language)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))?
.and_then(|attr| match attr.value() {
gimli::AttributeValue::Language(lang) => Some(lang),
_ => None,
})
} else {
None
};
let linep = debug_line
.program(line_offset, header.address_size(), comp_dir, comp_name)
.chain_err(|| "invalid compilation unit line rows")?;
let nrows = linep.header().raw_program_buf().len() as f64 / 5.5;
let cache_every = if nrows >= 100.0 {
Some(nrows.sqrt() as usize)
} else {
None
};
Unit {
skiplist: sync::RwLock::default(),
cache_every: cache_every,
address_size: header.address_size(),
base_address: base_address,
ranges: ranges,
line_offset: line_offset,
comp_dir: comp_dir,
comp_name: comp_name,
programs: vec![],
language: language,
phantom: PhantomData,
}
};
if !opts.with_functions {
return Ok(Some(unit));
}
while let Some((_, entry)) = entries
.next_dfs()
.chain_err(|| "tree below compilation unit yielded invalid entry")?
{
match entry.tag() {
gimli::DW_TAG_inlined_subroutine |
gimli::DW_TAG_subprogram => (),
_ => continue,
}
let ranges = Self::get_ranges(
entry,
debug_ranges,
header.address_size(),
unit.base_address,
).chain_err(|| "subroutine has invalid ranges")?;
if ranges.is_empty() {
continue;
}
let maybe_name = Self::resolve_name(entry, header, debug_str, &abbrev)
.chain_err(|| {
format!(
"failed to resolve name for subroutine at <{:x}><{:x}>",
header.offset().0,
entry.offset().0
)
})?;
let name = maybe_name.ok_or_else(|| {
ErrorKind::InvalidDebugSymbols(DebugInfoError::SubroutineMissingName(
header.offset().0,
entry.offset().0,
))
})?;
unit.programs.push(Program {
ranges: ranges,
inlined: entry.tag() == gimli::DW_TAG_inlined_subroutine,
name: name,
});
}
Ok(Some(unit))
}
fn resolve_name<'a, 'b>(
entry: &gimli::DebuggingInformationEntry<'input, 'a, 'b, Endian>,
header: &gimli::CompilationUnitHeader<'input, Endian>,
debug_str: &gimli::DebugStr<'input, Endian>,
abbrev: &gimli::Abbreviations,
) -> Result<Option<&'input std::ffi::CStr>> {
if let Some(name) = entry
.attr(gimli::DW_AT_linkage_name)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid subprogram linkage name")?
.and_then(|attr| attr.string_value(debug_str))
{
return Ok(Some(name));
}
if let Some(name) = entry
.attr(gimli::DW_AT_MIPS_linkage_name)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid subprogram linkage name")?
.and_then(|attr| attr.string_value(debug_str))
{
return Ok(Some(name));
}
if let Some(name) = entry
.attr(gimli::DW_AT_name)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid subprogram name")?
.and_then(|attr| attr.string_value(debug_str))
{
return Ok(Some(name));
}
if let Some(abstract_origin) =
Self::get_entry(entry, header, abbrev, gimli::DW_AT_abstract_origin)
.chain_err(|| "invalid subprogram abstract origin")?
{
let name = Self::resolve_name(&abstract_origin, header, debug_str, abbrev)
.chain_err(|| "abstract origin does not resolve to a name")?;
return Ok(name);
}
if let Some(specification) =
Self::get_entry(entry, header, abbrev, gimli::DW_AT_specification)
.chain_err(|| "invalid subprogram specification")?
{
let name = Self::resolve_name(&specification, header, debug_str, abbrev)
.chain_err(|| "specification does not resolve to a name")?;
return Ok(name);
}
Ok(None)
}
fn get_entry<'a>(
entry: &gimli::DebuggingInformationEntry<'input, 'a, 'a, Endian>,
header: &'a gimli::CompilationUnitHeader<'input, Endian>,
abbrev: &'a gimli::Abbreviations,
attr: gimli::DwAt,
) -> Result<Option<gimli::DebuggingInformationEntry<'input, 'a, 'a, Endian>>> {
if let Some(gimli::AttributeValue::UnitRef(offset)) =
entry
.attr_value(attr)
.map_err(|e| Error::from(ErrorKind::Gimli(e)))?
{
let mut entries = header.entries_at_offset(abbrev, offset)?;
let (_, entry) = entries
.next_dfs()?
.ok_or_else(|| {
ErrorKind::InvalidDebugSymbols(DebugInfoError::DanglingEntryOffset)
})?;
return Ok(Some(entry.clone()));
}
Ok(None)
}
fn get_ranges(
entry: &gimli::DebuggingInformationEntry<Endian>,
debug_ranges: &gimli::DebugRanges<Endian>,
address_size: u8,
base_address: u64,
) -> Result<Vec<gimli::Range>> {
if let Some(range) = Self::parse_noncontiguous_ranges(
entry,
debug_ranges,
address_size,
base_address,
)? {
return Ok(range);
}
if let Some(range) = Self::parse_contiguous_range(entry)?
.map(|range| vec![range])
{
return Ok(range);
}
return Ok(vec![]);
}
fn parse_noncontiguous_ranges(
entry: &gimli::DebuggingInformationEntry<Endian>,
debug_ranges: &gimli::DebugRanges<Endian>,
address_size: u8,
base_address: u64,
) -> Result<Option<Vec<gimli::Range>>> {
let offset = match entry.attr_value(gimli::DW_AT_ranges) {
Ok(Some(gimli::AttributeValue::DebugRangesRef(offset))) => offset,
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid ranges attribute")
}
_ => return Ok(None),
};
let ranges = debug_ranges
.ranges(offset, address_size, base_address)
.chain_err(|| "range offsets are not valid")?;
let ranges = ranges.collect().chain_err(|| "range could not be parsed")?;
Ok(Some(ranges))
}
fn parse_contiguous_range(
entry: &gimli::DebuggingInformationEntry<Endian>,
) -> Result<Option<gimli::Range>> {
if let Ok(Some(..)) = entry.attr_value(gimli::DW_AT_ranges) {
return Err(
ErrorKind::InvalidDebugSymbols(DebugInfoError::RangeBothContiguousAndNot).into(),
);
}
let low_pc = match entry.attr_value(gimli::DW_AT_low_pc) {
Ok(Some(gimli::AttributeValue::Addr(addr))) => addr,
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid low_pc attribute")
}
_ => return Ok(None),
};
let high_pc = match entry.attr_value(gimli::DW_AT_high_pc) {
Ok(Some(gimli::AttributeValue::Addr(addr))) => addr,
Ok(Some(gimli::AttributeValue::Udata(size))) => low_pc.wrapping_add(size),
Err(e) => {
return Err(Error::from(ErrorKind::Gimli(e)))
.chain_err(|| "invalid high_pc attribute")
}
Ok(None) => low_pc.wrapping_add(1),
_ => return Ok(None),
};
if low_pc == 0 {
return Ok(None);
}
if low_pc == high_pc {
return Ok(None);
}
if low_pc > high_pc {
return Err(
ErrorKind::InvalidDebugSymbols(DebugInfoError::RangeInverted).into(),
);
}
Ok(Some(gimli::Range {
begin: low_pc,
end: high_pc,
}))
}
fn contains_address(&self, address: u64) -> bool {
self.ranges
.iter()
.any(|range| address >= range.begin && address < range.end)
}
fn line_rows(
&self,
debug_line: &gimli::DebugLine<'input, Endian>,
) -> gimli::Result<
gimli::StateMachine<'input, gimli::IncompleteLineNumberProgram<'input, Endian>, Endian>,
> {
debug_line
.program(
self.line_offset,
self.address_size,
self.comp_dir,
self.comp_name,
)
.map(|h| h.rows())
}
fn comp_dir(&self) -> Option<&std::ffi::CStr> {
self.comp_dir
}
}
struct Program<'input> {
ranges: Vec<gimli::Range>,
name: &'input std::ffi::CStr,
#[allow(dead_code)]
inlined: bool,
}
impl<'input> Program<'input> {
fn contains_address(&self, address: u64) -> bool {
self.ranges
.iter()
.any(|range| address >= range.begin && address < range.end)
}
}
use std::ops::Deref;
impl<'mmap> Deref for MmapDerived<'mmap> {
type Target = EndianDebugInfo<'mmap>;
fn deref(&self) -> &Self::Target {
&*self.inner
}
}