From 6d32a8e7a0013c81d64944cd5932aa49c8ad9e30 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 13 Jul 2020 18:52:05 -0700 Subject: [PATCH] Refactor location and range list parsing to take an enum specifying the encoding, and add new _dwo methods on LocationsList that always use the new section and encoding regardless of the DWARF version to support the non-standard GNU split dwarf extension. --- src/read/loclists.rs | 179 ++++++++++++++++++++++++++++--------------- src/read/rnglists.rs | 123 ++++++++++++++++------------- 2 files changed, 190 insertions(+), 112 deletions(-) diff --git a/src/read/loclists.rs b/src/read/loclists.rs index 9011039a..b25bdc1a 100644 --- a/src/read/loclists.rs +++ b/src/read/loclists.rs @@ -196,6 +196,25 @@ impl LocationLists { )) } + /// Similar to `locations`, but with special handling for .dwo files. + /// This should only been used when this `LocationLists` was loaded from a + /// .dwo file. + pub fn locations_dwo( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, + base_address: u64, + debug_addr: &DebugAddr, + debug_addr_base: DebugAddrBase, + ) -> Result> { + Ok(LocListIter::new( + self.raw_locations_dwo(offset, unit_encoding)?, + base_address, + debug_addr.clone(), + debug_addr_base, + )) + } + /// Iterate over the raw `LocationListEntry`s starting at the given offset. /// /// The `unit_encoding` must match the compilation unit that the @@ -210,14 +229,38 @@ impl LocationLists { &self, offset: LocationListsOffset, unit_encoding: Encoding, + ) -> Result> { + let (mut input, format) = if unit_encoding.version <= 4 { + (self.debug_loc.section.clone(), LocListsFormat::Bare) + } else { + (self.debug_loclists.section.clone(), LocListsFormat::LLE) + }; + input.skip(offset.0)?; + Ok(RawLocListIter::new(input, unit_encoding, format)) + } + + /// Similar to `raw_locations`, but with special handling for .dwo files. + /// This should only been used when this `LocationLists` was loaded from a + /// .dwo file. + pub fn raw_locations_dwo( + &self, + offset: LocationListsOffset, + unit_encoding: Encoding, ) -> Result> { let mut input = if unit_encoding.version <= 4 { + // In the GNU split dwarf extension the locations are present in the + // .debug_loc section but are encoded with the DW_LLE values used + // for the DWARF 5 .debug_loclists section. self.debug_loc.section.clone() } else { self.debug_loclists.section.clone() }; input.skip(offset.0)?; - Ok(RawLocListIter::new(input, unit_encoding)) + Ok(RawLocListIter::new( + input, + unit_encoding, + LocListsFormat::LLE, + )) } /// Returns the `.debug_loclists` offset at the given `base` and `index`. @@ -251,6 +294,15 @@ impl LocationLists { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum LocListsFormat { + /// The bare location list format used before DWARF 5. + Bare, + /// The DW_LLE encoded range list format used in DWARF 5 and the non-standard GNU + /// split dwarf extension. + LLE, +} + /// A raw iterator over a location list. /// /// This iterator does not perform any processing of the location entries, @@ -259,6 +311,7 @@ impl LocationLists { pub struct RawLocListIter { input: R, encoding: Encoding, + format: LocListsFormat, } /// A raw entry in .debug_loclists. @@ -342,70 +395,76 @@ fn parse_data(input: &mut R) -> Result> { impl RawLocListEntry { /// Parse a location list entry from `.debug_loclists` - fn parse(input: &mut R, encoding: Encoding) -> Result> { - if encoding.version < 5 { - let range = RawRange::parse(input, encoding.address_size)?; - return Ok(if range.is_end() { - None - } else if range.is_base_address(encoding.address_size) { - Some(RawLocListEntry::BaseAddress { addr: range.end }) - } else { - let len = R::Offset::from_u16(input.read_u16()?); - let data = Expression(input.split(len)?); - Some(RawLocListEntry::AddressOrOffsetPair { - begin: range.begin, - end: range.end, - data, - }) - }); - } - Ok(match constants::DwLle(input.read_u8()?) { - constants::DW_LLE_end_of_list => None, - constants::DW_LLE_base_addressx => Some(RawLocListEntry::BaseAddressx { - addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - }), - constants::DW_LLE_startx_endx => Some(RawLocListEntry::StartxEndx { - begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - data: parse_data(input)?, - }), - constants::DW_LLE_startx_length => Some(RawLocListEntry::StartxLength { - begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - length: input.read_uleb128()?, - data: parse_data(input)?, - }), - constants::DW_LLE_offset_pair => Some(RawLocListEntry::OffsetPair { - begin: input.read_uleb128()?, - end: input.read_uleb128()?, - data: parse_data(input)?, - }), - constants::DW_LLE_default_location => Some(RawLocListEntry::DefaultLocation { - data: parse_data(input)?, - }), - constants::DW_LLE_base_address => Some(RawLocListEntry::BaseAddress { - addr: input.read_address(encoding.address_size)?, - }), - constants::DW_LLE_start_end => Some(RawLocListEntry::StartEnd { - begin: input.read_address(encoding.address_size)?, - end: input.read_address(encoding.address_size)?, - data: parse_data(input)?, - }), - constants::DW_LLE_start_length => Some(RawLocListEntry::StartLength { - begin: input.read_address(encoding.address_size)?, - length: input.read_uleb128()?, - data: parse_data(input)?, - }), - _ => { - return Err(Error::InvalidAddressRange); + fn parse(input: &mut R, encoding: Encoding, format: LocListsFormat) -> Result> { + match format { + LocListsFormat::Bare => { + let range = RawRange::parse(input, encoding.address_size)?; + return Ok(if range.is_end() { + None + } else if range.is_base_address(encoding.address_size) { + Some(RawLocListEntry::BaseAddress { addr: range.end }) + } else { + let len = R::Offset::from_u16(input.read_u16()?); + let data = Expression(input.split(len)?); + Some(RawLocListEntry::AddressOrOffsetPair { + begin: range.begin, + end: range.end, + data, + }) + }); } - }) + LocListsFormat::LLE => Ok(match constants::DwLle(input.read_u8()?) { + constants::DW_LLE_end_of_list => None, + constants::DW_LLE_base_addressx => Some(RawLocListEntry::BaseAddressx { + addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_LLE_startx_endx => Some(RawLocListEntry::StartxEndx { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + data: parse_data(input)?, + }), + constants::DW_LLE_startx_length => Some(RawLocListEntry::StartxLength { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + length: input.read_uleb128()?, + data: parse_data(input)?, + }), + constants::DW_LLE_offset_pair => Some(RawLocListEntry::OffsetPair { + begin: input.read_uleb128()?, + end: input.read_uleb128()?, + data: parse_data(input)?, + }), + constants::DW_LLE_default_location => Some(RawLocListEntry::DefaultLocation { + data: parse_data(input)?, + }), + constants::DW_LLE_base_address => Some(RawLocListEntry::BaseAddress { + addr: input.read_address(encoding.address_size)?, + }), + constants::DW_LLE_start_end => Some(RawLocListEntry::StartEnd { + begin: input.read_address(encoding.address_size)?, + end: input.read_address(encoding.address_size)?, + data: parse_data(input)?, + }), + constants::DW_LLE_start_length => Some(RawLocListEntry::StartLength { + begin: input.read_address(encoding.address_size)?, + length: input.read_uleb128()?, + data: parse_data(input)?, + }), + _ => { + return Err(Error::InvalidAddressRange); + } + }), + } } } impl RawLocListIter { /// Construct a `RawLocListIter`. - pub fn new(input: R, encoding: Encoding) -> RawLocListIter { - RawLocListIter { input, encoding } + fn new(input: R, encoding: Encoding, format: LocListsFormat) -> RawLocListIter { + RawLocListIter { + input, + encoding, + format, + } } /// Advance the iterator to the next location. @@ -414,7 +473,7 @@ impl RawLocListIter { return Ok(None); } - match RawLocListEntry::parse(&mut self.input, self.encoding) { + match RawLocListEntry::parse(&mut self.input, self.encoding, self.format) { Ok(entry) => { if entry.is_none() { self.input.empty(); diff --git a/src/read/rnglists.rs b/src/read/rnglists.rs index 39d8d5ba..7f9b1961 100644 --- a/src/read/rnglists.rs +++ b/src/read/rnglists.rs @@ -212,13 +212,13 @@ impl RangeLists { offset: RangeListsOffset, unit_encoding: Encoding, ) -> Result> { - let mut input = if unit_encoding.version <= 4 { - self.debug_ranges.section.clone() + let (mut input, format) = if unit_encoding.version <= 4 { + (self.debug_ranges.section.clone(), RangeListsFormat::Bare) } else { - self.debug_rnglists.section.clone() + (self.debug_rnglists.section.clone(), RangeListsFormat::RLE) }; input.skip(offset.0)?; - Ok(RawRngListIter::new(input, unit_encoding)) + Ok(RawRngListIter::new(input, unit_encoding, format)) } /// Returns the `.debug_rnglists` offset at the given `base` and `index`. @@ -255,6 +255,14 @@ impl RangeLists { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RangeListsFormat { + /// The bare range list format used before DWARF 5. + Bare, + /// The DW_RLE encoded range list format used in DWARF 5. + RLE, +} + /// A raw iterator over an address range list. /// /// This iterator does not perform any processing of the range entries, @@ -263,6 +271,7 @@ impl RangeLists { pub struct RawRngListIter { input: R, encoding: Encoding, + format: RangeListsFormat, } /// A raw entry in .debug_rnglists @@ -324,59 +333,69 @@ pub enum RawRngListEntry { impl RawRngListEntry { /// Parse a range entry from `.debug_rnglists` - fn parse>(input: &mut R, encoding: Encoding) -> Result> { - if encoding.version < 5 { - let range = RawRange::parse(input, encoding.address_size)?; - return Ok(if range.is_end() { - None - } else if range.is_base_address(encoding.address_size) { - Some(RawRngListEntry::BaseAddress { addr: range.end }) - } else { - Some(RawRngListEntry::AddressOrOffsetPair { - begin: range.begin, - end: range.end, - }) - }); - } - Ok(match constants::DwRle(input.read_u8()?) { - constants::DW_RLE_end_of_list => None, - constants::DW_RLE_base_addressx => Some(RawRngListEntry::BaseAddressx { - addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - }), - constants::DW_RLE_startx_endx => Some(RawRngListEntry::StartxEndx { - begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - }), - constants::DW_RLE_startx_length => Some(RawRngListEntry::StartxLength { - begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), - length: input.read_uleb128()?, - }), - constants::DW_RLE_offset_pair => Some(RawRngListEntry::OffsetPair { - begin: input.read_uleb128()?, - end: input.read_uleb128()?, - }), - constants::DW_RLE_base_address => Some(RawRngListEntry::BaseAddress { - addr: input.read_address(encoding.address_size)?, - }), - constants::DW_RLE_start_end => Some(RawRngListEntry::StartEnd { - begin: input.read_address(encoding.address_size)?, - end: input.read_address(encoding.address_size)?, - }), - constants::DW_RLE_start_length => Some(RawRngListEntry::StartLength { - begin: input.read_address(encoding.address_size)?, - length: input.read_uleb128()?, - }), - _ => { - return Err(Error::InvalidAddressRange); + fn parse>( + input: &mut R, + encoding: Encoding, + format: RangeListsFormat, + ) -> Result> { + match format { + RangeListsFormat::Bare => { + let range = RawRange::parse(input, encoding.address_size)?; + return Ok(if range.is_end() { + None + } else if range.is_base_address(encoding.address_size) { + Some(RawRngListEntry::BaseAddress { addr: range.end }) + } else { + Some(RawRngListEntry::AddressOrOffsetPair { + begin: range.begin, + end: range.end, + }) + }); } - }) + RangeListsFormat::RLE => Ok(match constants::DwRle(input.read_u8()?) { + constants::DW_RLE_end_of_list => None, + constants::DW_RLE_base_addressx => Some(RawRngListEntry::BaseAddressx { + addr: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_RLE_startx_endx => Some(RawRngListEntry::StartxEndx { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + end: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + }), + constants::DW_RLE_startx_length => Some(RawRngListEntry::StartxLength { + begin: DebugAddrIndex(input.read_uleb128().and_then(R::Offset::from_u64)?), + length: input.read_uleb128()?, + }), + constants::DW_RLE_offset_pair => Some(RawRngListEntry::OffsetPair { + begin: input.read_uleb128()?, + end: input.read_uleb128()?, + }), + constants::DW_RLE_base_address => Some(RawRngListEntry::BaseAddress { + addr: input.read_address(encoding.address_size)?, + }), + constants::DW_RLE_start_end => Some(RawRngListEntry::StartEnd { + begin: input.read_address(encoding.address_size)?, + end: input.read_address(encoding.address_size)?, + }), + constants::DW_RLE_start_length => Some(RawRngListEntry::StartLength { + begin: input.read_address(encoding.address_size)?, + length: input.read_uleb128()?, + }), + _ => { + return Err(Error::InvalidAddressRange); + } + }), + } } } impl RawRngListIter { /// Construct a `RawRngListIter`. - fn new(input: R, encoding: Encoding) -> RawRngListIter { - RawRngListIter { input, encoding } + fn new(input: R, encoding: Encoding, format: RangeListsFormat) -> RawRngListIter { + RawRngListIter { + input, + encoding, + format, + } } /// Advance the iterator to the next range. @@ -385,7 +404,7 @@ impl RawRngListIter { return Ok(None); } - match RawRngListEntry::parse(&mut self.input, self.encoding) { + match RawRngListEntry::parse(&mut self.input, self.encoding, self.format) { Ok(range) => { if range.is_none() { self.input.empty();