nac3/nac3ld/src/dwarf.rs

501 lines
16 KiB
Rust
Raw Normal View History

2022-05-30 17:11:05 +08:00
#![allow(non_camel_case_types, non_upper_case_globals)]
use std::mem;
2022-06-06 14:37:48 +08:00
use byteorder::{ByteOrder, LittleEndian};
2022-05-30 17:11:05 +08:00
pub const DW_EH_PE_omit: u8 = 0xFF;
pub const DW_EH_PE_absptr: u8 = 0x00;
pub const DW_EH_PE_uleb128: u8 = 0x01;
pub const DW_EH_PE_udata2: u8 = 0x02;
pub const DW_EH_PE_udata4: u8 = 0x03;
pub const DW_EH_PE_udata8: u8 = 0x04;
pub const DW_EH_PE_sleb128: u8 = 0x09;
pub const DW_EH_PE_sdata2: u8 = 0x0A;
pub const DW_EH_PE_sdata4: u8 = 0x0B;
pub const DW_EH_PE_sdata8: u8 = 0x0C;
pub const DW_EH_PE_pcrel: u8 = 0x10;
pub const DW_EH_PE_textrel: u8 = 0x20;
pub const DW_EH_PE_datarel: u8 = 0x30;
pub const DW_EH_PE_funcrel: u8 = 0x40;
pub const DW_EH_PE_aligned: u8 = 0x50;
pub const DW_EH_PE_indirect: u8 = 0x80;
2024-11-28 16:51:07 +08:00
#[derive(Clone)]
2022-06-06 14:37:48 +08:00
pub struct DwarfReader<'a> {
pub slice: &'a [u8],
2022-05-30 17:11:05 +08:00
pub virt_addr: u32,
}
2022-06-06 14:37:48 +08:00
impl<'a> DwarfReader<'a> {
pub fn new(slice: &[u8], virt_addr: u32) -> DwarfReader {
DwarfReader { slice, virt_addr }
}
pub fn offset(&mut self, offset: u32) {
2022-06-06 14:37:48 +08:00
self.slice = &self.slice[offset as usize..];
self.virt_addr = self.virt_addr.wrapping_add(offset);
2022-05-30 17:11:05 +08:00
}
/// ULEB128 and SLEB128 encodings are defined in Section 7.6 - "Variable Length Data" of the
/// [DWARF-4 Manual](https://dwarfstd.org/doc/DWARF4.pdf).
2022-06-06 14:37:48 +08:00
pub fn read_uleb128(&mut self) -> u64 {
2022-05-30 17:11:05 +08:00
let mut shift: usize = 0;
let mut result: u64 = 0;
let mut byte: u8;
loop {
2022-06-06 14:37:48 +08:00
byte = self.read_u8();
2024-06-12 15:13:09 +08:00
result |= u64::from(byte & 0x7F) << shift;
2022-05-30 17:11:05 +08:00
shift += 7;
if byte & 0x80 == 0 {
break;
}
}
result
}
2022-06-06 14:37:48 +08:00
pub fn read_sleb128(&mut self) -> i64 {
2022-05-30 17:11:05 +08:00
let mut shift: u32 = 0;
let mut result: u64 = 0;
let mut byte: u8;
loop {
2022-06-06 14:37:48 +08:00
byte = self.read_u8();
2024-06-12 15:13:09 +08:00
result |= u64::from(byte & 0x7F) << shift;
2022-05-30 17:11:05 +08:00
shift += 7;
if byte & 0x80 == 0 {
break;
}
}
// sign-extend
if shift < u64::BITS && (byte & 0x40) != 0 {
result |= (!0u64) << shift;
2022-05-30 17:11:05 +08:00
}
result as i64
}
2022-06-06 14:37:48 +08:00
pub fn read_u8(&mut self) -> u8 {
let val = self.slice[0];
self.slice = &self.slice[1..];
self.virt_addr += 1;
2022-06-06 14:37:48 +08:00
val
}
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
macro_rules! impl_read_fn {
( $($type: ty, $byteorder_fn: ident);* ) => {
impl<'a> DwarfReader<'a> {
$(
pub fn $byteorder_fn(&mut self) -> $type {
let val = LittleEndian::$byteorder_fn(self.slice);
self.slice = &self.slice[mem::size_of::<$type>()..];
self.virt_addr += mem::size_of::<$type>() as u32;
2022-06-06 14:37:48 +08:00
val
}
)*
}
}
}
impl_read_fn!(
u16, read_u16;
u32, read_u32;
u64, read_u64;
i16, read_i16;
i32, read_i32;
i64, read_i64
);
pub struct DwarfWriter<'a> {
pub slice: &'a mut [u8],
pub offset: usize,
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
impl<'a> DwarfWriter<'a> {
pub fn new(slice: &mut [u8]) -> DwarfWriter {
DwarfWriter { slice, offset: 0 }
}
pub fn write_u8(&mut self, data: u8) {
self.slice[self.offset] = data;
self.offset += 1;
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
pub fn write_u32(&mut self, data: u32) {
LittleEndian::write_u32(&mut self.slice[self.offset..], data);
self.offset += 4;
2022-05-30 17:11:05 +08:00
}
}
2022-06-06 14:37:48 +08:00
fn read_encoded_pointer(reader: &mut DwarfReader, encoding: u8) -> Result<usize, ()> {
2022-05-30 17:11:05 +08:00
if encoding == DW_EH_PE_omit {
return Err(());
}
// DW_EH_PE_aligned implies it's an absolute pointer value
2022-06-06 14:37:48 +08:00
// However, we are linking library for 32-bits architecture
// The size of variable should be 4 bytes instead
2022-05-30 17:11:05 +08:00
if encoding == DW_EH_PE_aligned {
2022-06-06 14:37:48 +08:00
let shifted_virt_addr = round_up(reader.virt_addr as usize, mem::size_of::<u32>())?;
let addr_inc = shifted_virt_addr - reader.virt_addr as usize;
reader.slice = &reader.slice[addr_inc..];
reader.virt_addr = shifted_virt_addr as u32;
return Ok(reader.read_u32() as usize);
2022-05-30 17:11:05 +08:00
}
match encoding & 0x0F {
2024-06-12 15:13:09 +08:00
DW_EH_PE_absptr | DW_EH_PE_udata4 => Ok(reader.read_u32() as usize),
2022-05-30 17:11:05 +08:00
DW_EH_PE_uleb128 => Ok(reader.read_uleb128() as usize),
2022-06-06 14:37:48 +08:00
DW_EH_PE_udata2 => Ok(reader.read_u16() as usize),
DW_EH_PE_udata8 => Ok(reader.read_u64() as usize),
2022-05-30 17:11:05 +08:00
DW_EH_PE_sleb128 => Ok(reader.read_sleb128() as usize),
2022-06-06 14:37:48 +08:00
DW_EH_PE_sdata2 => Ok(reader.read_i16() as usize),
DW_EH_PE_sdata4 => Ok(reader.read_i32() as usize),
DW_EH_PE_sdata8 => Ok(reader.read_i64() as usize),
2022-05-30 17:11:05 +08:00
_ => Err(()),
}
}
2024-06-12 14:45:03 +08:00
fn read_encoded_pointer_with_pc(reader: &mut DwarfReader, encoding: u8) -> Result<usize, ()> {
2022-05-30 17:11:05 +08:00
let entry_virt_addr = reader.virt_addr;
let mut result = read_encoded_pointer(reader, encoding)?;
// DW_EH_PE_aligned implies it's an absolute pointer value
if encoding == DW_EH_PE_aligned {
return Ok(result);
}
result = match encoding & 0x70 {
DW_EH_PE_pcrel => result.wrapping_add(entry_virt_addr as usize),
// .eh_frame normally would not have these kinds of relocations
// These would not be supported by a dedicated linker relocation schemes for RISC-V
DW_EH_PE_textrel | DW_EH_PE_datarel | DW_EH_PE_funcrel | DW_EH_PE_aligned => {
unimplemented!()
}
// Other values should be impossible
_ => unreachable!(),
};
if encoding & DW_EH_PE_indirect != 0 {
// There should not be a need for indirect addressing, as assembly code from
// the dynamic library should not be freely moved relative to the EH frame.
unreachable!()
}
Ok(result)
}
#[inline]
fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
if align.is_power_of_two() {
Ok((unrounded + align - 1) & !(align - 1))
} else {
Err(())
}
}
/// Minimalistic structure to store everything needed for parsing FDEs to synthesize `.eh_frame_hdr`
/// section.
///
/// Refer to [The Linux Standard Base Core Specification, Generic Part](https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html)
/// for more information.
2022-06-06 14:37:48 +08:00
pub struct EH_Frame<'a> {
reader: DwarfReader<'a>,
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
impl<'a> EH_Frame<'a> {
/// Creates an [EH_Frame] using the bytes in the `.eh_frame` section and its address in the ELF
/// file.
2024-06-12 15:13:09 +08:00
pub fn new(eh_frame_slice: &[u8], eh_frame_addr: u32) -> EH_Frame {
EH_Frame { reader: DwarfReader::new(eh_frame_slice, eh_frame_addr) }
}
/// Returns an [Iterator] over all Call Frame Information (CFI) records.
pub fn cfi_records(&self) -> CFI_Records<'a> {
2024-11-28 16:51:07 +08:00
let reader = self.reader.clone();
let len = reader.slice.len();
2024-06-12 14:45:03 +08:00
CFI_Records { reader, available: len }
}
}
/// A single Call Frame Information (CFI) record.
///
/// From the [specification](https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html):
///
/// > Each CFI record contains a Common Information Entry (CIE) record followed by 1 or more Frame
2024-08-21 11:10:52 +08:00
/// > Description Entry (FDE) records.
pub struct CFI_Record<'a> {
// It refers to the augmentation data that corresponds to 'R' in the augmentation string
fde_pointer_encoding: u8,
fde_reader: DwarfReader<'a>,
}
impl<'a> CFI_Record<'a> {
pub fn from_reader(cie_reader: &mut DwarfReader<'a>) -> Result<CFI_Record<'a>, ()> {
2022-06-06 14:37:48 +08:00
let length = cie_reader.read_u32();
2022-05-30 17:11:05 +08:00
let fde_reader = match length {
// eh_frame with 0 lengths means the CIE is terminated
0 => panic!("Cannot create an EH_Frame from a termination CIE"),
// length == u32::MAX means that the length is only representable with 64 bits,
2022-05-30 17:11:05 +08:00
// which does not make sense in a system with 32-bit address.
2024-06-12 15:13:09 +08:00
0xFFFF_FFFF => unimplemented!(),
2022-05-30 17:11:05 +08:00
_ => {
2024-11-28 16:51:07 +08:00
let mut fde_reader = cie_reader.clone();
fde_reader.offset(length);
2022-05-30 17:11:05 +08:00
fde_reader
}
};
// Routine check on the .eh_frame well-formness, in terms of CIE ID & Version args.
let cie_ptr = cie_reader.read_u32();
assert_eq!(cie_ptr, 0);
2022-06-06 14:37:48 +08:00
assert_eq!(cie_reader.read_u8(), 1);
2022-05-30 17:11:05 +08:00
// Parse augmentation string
// The first character must be 'z', there is no way to proceed otherwise
2022-06-06 14:37:48 +08:00
assert_eq!(cie_reader.read_u8(), b'z');
2022-05-30 17:11:05 +08:00
// Establish a pointer that skips ahead of the string
// Skip code/data alignment factors & return address register along the way as well
// We only tackle the case where 'z' and 'R' are part of the augmentation string, otherwise
// we cannot get the addresses to make .eh_frame_hdr
2024-11-28 16:51:07 +08:00
let mut aug_data_reader = cie_reader.clone();
2022-05-30 17:11:05 +08:00
let mut aug_str_len = 0;
loop {
2022-06-06 14:37:48 +08:00
if aug_data_reader.read_u8() == b'\0' {
2022-05-30 17:11:05 +08:00
break;
}
aug_str_len += 1;
}
if aug_str_len == 0 {
unimplemented!();
}
aug_data_reader.read_uleb128(); // Code alignment factor
aug_data_reader.read_sleb128(); // Data alignment factor
aug_data_reader.read_uleb128(); // Return address register
aug_data_reader.read_uleb128(); // Augmentation data length
let mut fde_pointer_encoding = DW_EH_PE_omit;
for _ in 0..aug_str_len {
2022-06-06 14:37:48 +08:00
match cie_reader.read_u8() {
2022-05-30 17:11:05 +08:00
b'L' => {
2022-06-06 14:37:48 +08:00
aug_data_reader.read_u8();
2022-05-30 17:11:05 +08:00
}
b'P' => {
2022-06-06 14:37:48 +08:00
let encoding = aug_data_reader.read_u8();
2022-05-30 17:11:05 +08:00
read_encoded_pointer(&mut aug_data_reader, encoding)?;
}
b'R' => {
2022-06-06 14:37:48 +08:00
fde_pointer_encoding = aug_data_reader.read_u8();
2022-05-30 17:11:05 +08:00
}
// Other characters are not supported
_ => unimplemented!(),
}
}
assert_ne!(fde_pointer_encoding, DW_EH_PE_omit);
2024-06-12 14:45:03 +08:00
Ok(CFI_Record { fde_pointer_encoding, fde_reader })
2022-05-30 17:11:05 +08:00
}
/// Returns a [DwarfReader] initialized to the first Frame Description Entry (FDE) of this CFI
/// record.
pub fn get_fde_reader(&self) -> DwarfReader<'a> {
2024-11-28 16:51:07 +08:00
self.fde_reader.clone()
}
/// Returns an [Iterator] over all Frame Description Entries (FDEs).
pub fn fde_records(&self) -> FDE_Records<'a> {
let reader = self.get_fde_reader();
let len = reader.slice.len();
2024-06-12 14:45:03 +08:00
FDE_Records { pointer_encoding: self.fde_pointer_encoding, reader, available: len }
}
}
/// [Iterator] over Call Frame Information (CFI) records in an
/// [Exception Handling (EH) frame][EH_Frame].
pub struct CFI_Records<'a> {
reader: DwarfReader<'a>,
available: usize,
}
impl<'a> Iterator for CFI_Records<'a> {
type Item = CFI_Record<'a>;
fn next(&mut self) -> Option<Self::Item> {
2022-05-30 17:11:05 +08:00
loop {
if self.available == 0 {
return None;
2022-05-30 17:11:05 +08:00
}
2024-11-28 16:51:07 +08:00
let mut this_reader = self.reader.clone();
// Remove the length of the header and the content from the counter
let length = self.reader.read_u32();
let length = match length {
// eh_frame with 0-length means the CIE is terminated
0 => return None,
2024-06-12 15:13:09 +08:00
0xFFFF_FFFF => unimplemented!("CIE entries larger than 4 bytes not supported"),
2022-05-30 17:11:05 +08:00
other => other,
} as usize;
2022-05-30 17:11:05 +08:00
// Remove the length of the header and the content from the counter
self.available -= length + mem::size_of::<u32>();
2024-11-28 16:51:07 +08:00
let mut next_reader = self.reader.clone();
next_reader.offset(length as u32);
2022-05-30 17:11:05 +08:00
let cie_ptr = self.reader.read_u32();
2022-05-30 17:11:05 +08:00
self.reader = next_reader;
// Skip this record if it is a FDE
if cie_ptr == 0 {
// Rewind back to the start of the CFI Record
2024-06-12 14:45:03 +08:00
return Some(CFI_Record::from_reader(&mut this_reader).ok().unwrap());
}
}
}
}
/// [Iterator] over Frame Description Entries (FDEs) in an
/// [Exception Handling (EH) frame][EH_Frame].
pub struct FDE_Records<'a> {
pointer_encoding: u8,
reader: DwarfReader<'a>,
available: usize,
}
impl<'a> Iterator for FDE_Records<'a> {
type Item = (u32, u32);
2022-05-30 17:11:05 +08:00
fn next(&mut self) -> Option<Self::Item> {
// Parse each FDE to obtain the starting address that the FDE applies to
// Send the FDE offset and the mentioned address to a callback that write up the
// .eh_frame_hdr section
if self.available == 0 {
return None;
2022-05-30 17:11:05 +08:00
}
let fde_addr = self.reader.virt_addr;
// Remove the length of the header and the content from the counter
let length = match self.reader.read_u32() {
// eh_frame with 0-length means the CIE is terminated
0 => return None,
2024-06-12 15:13:09 +08:00
0xFFFF_FFFF => unimplemented!("CIE entries larger than 4 bytes not supported"),
other => other,
} as usize;
// Remove the length of the header and the content from the counter
self.available -= length + mem::size_of::<u32>();
2024-11-28 16:51:07 +08:00
let mut next_fde_reader = self.reader.clone();
next_fde_reader.offset(length as u32);
let cie_ptr = self.reader.read_u32();
let next_val = if cie_ptr != 0 {
let pc_begin = read_encoded_pointer_with_pc(&mut self.reader, self.pointer_encoding)
.expect("Failed to read PC Begin");
Some((pc_begin as u32, fde_addr))
} else {
None
};
self.reader = next_fde_reader;
next_val
2022-05-30 17:11:05 +08:00
}
}
2022-06-06 14:37:48 +08:00
pub struct EH_Frame_Hdr<'a> {
fde_writer: DwarfWriter<'a>,
2022-05-30 17:11:05 +08:00
eh_frame_hdr_addr: u32,
fdes: Vec<(u32, u32)>,
}
2022-06-06 14:37:48 +08:00
impl<'a> EH_Frame_Hdr<'a> {
/// Create a [EH_Frame_Hdr] object, and write out the fixed fields of `.eh_frame_hdr` to memory.
///
/// Load address is not known at this point.
2022-06-06 14:37:48 +08:00
pub fn new(
2022-05-30 17:11:05 +08:00
eh_frame_hdr_slice: &mut [u8],
eh_frame_hdr_addr: u32,
eh_frame_addr: u32,
) -> EH_Frame_Hdr {
2022-06-06 14:37:48 +08:00
let mut writer = DwarfWriter::new(eh_frame_hdr_slice);
2022-05-30 17:11:05 +08:00
2024-06-12 14:45:03 +08:00
writer.write_u8(1); // version
writer.write_u8(0x1B); // eh_frame_ptr_enc - PC-relative 4-byte signed value
writer.write_u8(0x03); // fde_count_enc - 4-byte unsigned value
writer.write_u8(0x3B); // table_enc - .eh_frame_hdr section-relative 4-byte signed value
2024-11-29 15:42:51 +08:00
let eh_frame_offset = eh_frame_addr.wrapping_sub(eh_frame_hdr_addr + writer.offset as u32);
2024-06-12 14:45:03 +08:00
writer.write_u32(eh_frame_offset); // eh_frame_ptr
writer.write_u32(0); // `fde_count`, will be written in finalize_fde
2022-06-06 14:37:48 +08:00
EH_Frame_Hdr { fde_writer: writer, eh_frame_hdr_addr, fdes: Vec::new() }
}
2022-05-30 17:11:05 +08:00
/// The offset of the `fde_count` value relative to the start of the `.eh_frame_hdr` section in
/// bytes.
2022-06-06 14:37:48 +08:00
fn fde_count_offset() -> usize {
8
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
pub fn add_fde(&mut self, init_loc: u32, addr: u32) {
2022-05-30 17:11:05 +08:00
self.fdes.push((
init_loc.wrapping_sub(self.eh_frame_hdr_addr),
addr.wrapping_sub(self.eh_frame_hdr_addr),
));
}
2022-06-06 14:37:48 +08:00
pub fn finalize_fde(mut self) {
2022-05-30 17:11:05 +08:00
self.fdes
.sort_by(|(left_init_loc, _), (right_init_loc, _)| left_init_loc.cmp(right_init_loc));
for (init_loc, addr) in &self.fdes {
2022-06-06 14:37:48 +08:00
self.fde_writer.write_u32(*init_loc);
self.fde_writer.write_u32(*addr);
2022-05-30 17:11:05 +08:00
}
2024-06-12 14:45:03 +08:00
LittleEndian::write_u32(
&mut self.fde_writer.slice[Self::fde_count_offset()..],
self.fdes.len() as u32,
);
2022-05-30 17:11:05 +08:00
}
2022-06-06 14:37:48 +08:00
pub fn size_from_eh_frame(eh_frame: &[u8]) -> usize {
// The virtual address of the EH frame does not matter in this case
2022-05-30 17:11:05 +08:00
// Calculation of size does not involve modifying any headers
2022-06-06 14:37:48 +08:00
let mut reader = DwarfReader::new(eh_frame, 0);
2022-05-30 17:11:05 +08:00
let mut fde_count = 0;
2022-06-06 14:37:48 +08:00
while !reader.slice.is_empty() {
2022-05-30 17:11:05 +08:00
// The original length field should be able to hold the entire value.
// The device memory space is limited to 32-bits addresses anyway.
2022-06-06 14:37:48 +08:00
let entry_length = reader.read_u32();
2024-06-12 15:13:09 +08:00
if entry_length == 0 || entry_length == 0xFFFF_FFFF {
2022-05-30 17:11:05 +08:00
unimplemented!()
}
// This slot stores the CIE ID (for CIE)/CIE Pointer (for FDE).
// This value must be non-zero for FDEs.
let cie_ptr = reader.read_u32();
if cie_ptr != 0 {
2022-05-30 17:11:05 +08:00
fde_count += 1;
}
2024-06-12 15:13:09 +08:00
reader.offset(entry_length - mem::size_of::<u32>() as u32);
2022-05-30 17:11:05 +08:00
}
2022-05-30 17:11:05 +08:00
12 + fde_count * 8
}
}