Skip to content

Commit

Permalink
GP-3174 Indexed attribute marshaling
Browse files Browse the repository at this point in the history
  • Loading branch information
caheckman committed Mar 13, 2023
1 parent a3ca5a6 commit b39c60e
Show file tree
Hide file tree
Showing 16 changed files with 162 additions and 66 deletions.
2 changes: 2 additions & 0 deletions Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,8 @@ SymbolEntry *ActionConstantPtr::isPointer(AddrSpace *spc,Varnode *vn,PcodeOp *op
if (slot==0)
return (SymbolEntry *)0;
break;
case CPUI_PIECE:
// Pointers get concatenated in structures
case CPUI_COPY:
case CPUI_INT_EQUAL:
case CPUI_INT_NOTEQUAL:
Expand Down
8 changes: 8 additions & 0 deletions Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1918,6 +1918,12 @@ void Heritage::splitJoinRead(Varnode *vn,JoinRecord *joinrec)

{
PcodeOp *op = vn->loneDescend(); // vn isFree, so loneDescend must be non-null
bool preventConstCollapse = false;
if (vn->isTypeLock()) {
type_metatype meta = vn->getType()->getMetatype();
if (meta == TYPE_STRUCT || meta == TYPE_ARRAY)
preventConstCollapse = true;
}

vector<Varnode *> lastcombo;
vector<Varnode *> nextlev;
Expand All @@ -1937,6 +1943,8 @@ void Heritage::splitJoinRead(Varnode *vn,JoinRecord *joinrec)
fd->opSetInput(concat,mosthalf,0);
fd->opSetInput(concat,leasthalf,1);
fd->opInsertBefore(concat,op);
if (preventConstCollapse)
fd->opMarkNoCollapse(concat);
mosthalf->setPrecisHi(); // Set precision flags to trigger "double precision" rules
leasthalf->setPrecisLo();
op = concat; // Keep -op- as the earliest op in the concatenation construction
Expand Down
44 changes: 44 additions & 0 deletions Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,25 @@ uint4 XmlDecode::getNextAttributeId(void)
return 0;
}

uint4 XmlDecode::getIndexedAttributeId(const AttributeId &attribId)

{
const Element *el = elStack.back();
if (attributeIndex < 0 || attributeIndex >= el->getNumAttributes())
return ATTRIB_UNKNOWN.getId();
// For XML, the index is encoded directly in the attribute name
const string &attribName(el->getAttributeName(attributeIndex));
// Does the name start with desired attribute base name?
if (0 != attribName.compare(0,attribId.getName().size(),attribId.getName()))
return ATTRIB_UNKNOWN.getId();
uint4 val = 0;
istringstream s(attribName.substr(attribId.getName().size())); // Strip off the base name
s >> dec >> val; // Decode the remaining decimal integer (starting at 1)
if (val == 0)
throw LowlevelError("Bad indexed attribute: " + attribId.getName());
return attribId.getId() + (val-1);
}

/// \brief Find the attribute index, within the given element, for the given name
///
/// Run through the attributes of the element until we find the one matching the name,
Expand Down Expand Up @@ -479,6 +498,16 @@ void XmlEncode::writeString(const AttributeId &attribId,const string &val)
a_v(outStream,attribId.getName(),val);
}

void XmlEncode::writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val)

{
outStream << ' ' << attribId.getName() << dec << index + 1;
outStream << "=\"";
xml_escape(outStream,val.c_str());
outStream << "\"";

}

void XmlEncode::writeSpace(const AttributeId &attribId,const AddrSpace *spc)

{
Expand Down Expand Up @@ -711,6 +740,12 @@ uint4 PackedDecode::getNextAttributeId(void)
return id;
}

uint4 PackedDecode::getIndexedAttributeId(const AttributeId &attribId)

{
return ATTRIB_UNKNOWN.getId(); // PackedDecode never needs to reinterpret an attribute
}

bool PackedDecode::readBool(void)

{
Expand Down Expand Up @@ -1046,6 +1081,15 @@ void PackedEncode::writeString(const AttributeId &attribId,const string &val)
outStream.write(val.c_str(), length);
}

void PackedEncode::writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val)

{
uint8 length = val.length();
writeHeader(ATTRIBUTE, attribId.getId() + index);
writeInteger((TYPECODE_STRING << TYPECODE_SHIFT), length);
outStream.write(val.c_str(), length);
}

void PackedEncode::writeSpace(const AttributeId &attribId,const AddrSpace *spc)

{
Expand Down
24 changes: 24 additions & 0 deletions Ghidra/Features/Decompiler/src/decompile/cpp/marshal.hh
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ public:
/// \return the id of the next attribute or 0
virtual uint4 getNextAttributeId(void)=0;

/// \brief Get the id for the (current) attribute, assuming it is indexed
///
/// Assuming the previous call to getNextAttributeId() returned the id of ATTRIB_UNKNOWN,
/// reinterpret the attribute as being an indexed form of the given attribute. If the attribute
/// matches, return this indexed id, otherwise return ATTRIB_UNKNOWN.
/// \param attribId is the attribute being indexed
/// \return the indexed id or ATTRIB_UNKNOWN
virtual uint4 getIndexedAttributeId(const AttributeId &attribId)=0;

/// \brief Reset attribute traversal for the current element
///
/// Attributes for a single element can be traversed more than once using the getNextAttributeId method.
Expand Down Expand Up @@ -322,6 +331,17 @@ public:
/// \param val is the string to encode
virtual void writeString(const AttributeId &attribId,const string &val)=0;

/// \brief Write an annotated string, using an indexed attribute, into the encoding
///
/// Multiple attributes with a shared name can be written to the same element by calling this method
/// multiple times with a different \b index value. The encoding will use attribute ids up to the base id
/// plus the maximum index passed in. Implementors must be careful to not use other attributes with ids
/// bigger than the base id within the element taking the indexed attribute.
/// \param attribId is the shared AttributeId
/// \param index is the unique index to associated with the string
/// \param val is the string to encode
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val)=0;

/// \brief Write an address space reference into the encoding
///
/// The address space is associated with the given AttributeId annotation and the current open element.
Expand Down Expand Up @@ -357,6 +377,7 @@ public:
virtual void closeElementSkipping(uint4 id);
virtual void rewindAttributes(void);
virtual uint4 getNextAttributeId(void);
virtual uint4 getIndexedAttributeId(const AttributeId &attribId);
virtual bool readBool(void);
virtual bool readBool(const AttributeId &attribId);
virtual intb readSignedInteger(void);
Expand Down Expand Up @@ -387,6 +408,7 @@ public:
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
virtual void writeString(const AttributeId &attribId,const string &val);
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val);
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
};

Expand Down Expand Up @@ -492,6 +514,7 @@ public:
virtual void closeElementSkipping(uint4 id);
virtual void rewindAttributes(void);
virtual uint4 getNextAttributeId(void);
virtual uint4 getIndexedAttributeId(const AttributeId &attribId);
virtual bool readBool(void);
virtual bool readBool(const AttributeId &attribId);
virtual intb readSignedInteger(void);
Expand Down Expand Up @@ -521,6 +544,7 @@ public:
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
virtual void writeString(const AttributeId &attribId,const string &val);
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val);
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
};

Expand Down
1 change: 0 additions & 1 deletion Ghidra/Features/Decompiler/src/decompile/cpp/op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ int4 PcodeOp::getRepeatSlot(const Varnode *vn,int4 firstSlot,list<PcodeOp *>::co
bool PcodeOp::isCollapsible(void) const

{
if (code() == CPUI_COPY) return false;
if ((flags & PcodeOp::nocollapse)!=0) return false;
if (!isAssignment()) return false;
if (inrefs.size()==0) return false;
Expand Down
31 changes: 14 additions & 17 deletions Ghidra/Features/Decompiler/src/decompile/cpp/space.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,11 @@ AttributeId ATTRIB_DEADCODEDELAY = AttributeId("deadcodedelay",90);
AttributeId ATTRIB_DELAY = AttributeId("delay", 91);
AttributeId ATTRIB_LOGICALSIZE = AttributeId("logicalsize",92);
AttributeId ATTRIB_PHYSICAL = AttributeId("physical",93);
AttributeId ATTRIB_PIECE1 = AttributeId("piece1",94); // piece attributes must have sequential ids
AttributeId ATTRIB_PIECE2 = AttributeId("piece2",95);
AttributeId ATTRIB_PIECE3 = AttributeId("piece3",96);
AttributeId ATTRIB_PIECE4 = AttributeId("piece4",97);
AttributeId ATTRIB_PIECE5 = AttributeId("piece5",98);
AttributeId ATTRIB_PIECE6 = AttributeId("piece6",99);
AttributeId ATTRIB_PIECE7 = AttributeId("piece7",100);
AttributeId ATTRIB_PIECE8 = AttributeId("piece8",101);
AttributeId ATTRIB_PIECE9 = AttributeId("piece9",102);

// ATTRIB_PIECE is a special attribute for supporting the legacy attributes "piece1", "piece2", ..., "piece9",
// It is effectively a sequence of indexed attributes for use with Encoder::writeStringIndexed.
// The index starts at the ids reserved for "piece1" thru "piece9" but can extend farther.
AttributeId ATTRIB_PIECE = AttributeId("piece",94); // Open slots 94-102

/// Calculate \e highest based on \e addressSize, and \e wordsize.
/// This also calculates the default pointerLowerBound
Expand Down Expand Up @@ -552,20 +548,17 @@ int4 JoinSpace::overlapJoin(uintb offset,int4 size,AddrSpace *pointSpace,uintb p
void JoinSpace::encodeAttributes(Encoder &encoder,uintb offset) const

{
static AttributeId *pieceArray[] = { &ATTRIB_PIECE1, &ATTRIB_PIECE2, &ATTRIB_PIECE3, &ATTRIB_PIECE4,
&ATTRIB_PIECE5, &ATTRIB_PIECE6, &ATTRIB_PIECE7, &ATTRIB_PIECE8, &ATTRIB_PIECE9 };
JoinRecord *rec = getManager()->findJoin(offset); // Record must already exist
encoder.writeSpace(ATTRIB_SPACE, this);
int4 num = rec->numPieces();
if (num >= 8)
throw LowlevelError("Cannot encode more than 8 pieces");
if (num > MAX_PIECES)
throw LowlevelError("Exceeded maximum pieces in one join address");
for(int4 i=0;i<num;++i) {
const VarnodeData &vdata( rec->getPiece(i) );
ostringstream t;
AttributeId *attribId = pieceArray[i];
t << vdata.space->getName() << ":0x";
t << hex << vdata.offset << ':' << dec << vdata.size;
encoder.writeString(*attribId, t.str());
encoder.writeStringIndexed(ATTRIB_PIECE, i, t.str());
}
if (num == 1)
encoder.writeUnsignedInteger(ATTRIB_LOGICALSIZE, rec->getUnified().size);
Expand Down Expand Up @@ -602,9 +595,13 @@ uintb JoinSpace::decodeAttributes(Decoder &decoder,uint4 &size) const
logicalsize = decoder.readUnsignedInteger();
continue;
}
if (attribId < ATTRIB_PIECE1.getId() || attribId > ATTRIB_PIECE9.getId())
else if (attribId == ATTRIB_UNKNOWN)
attribId = decoder.getIndexedAttributeId(ATTRIB_PIECE);
if (attribId < ATTRIB_PIECE.getId())
continue;
int4 pos = (int4)(attribId - ATTRIB_PIECE.getId());
if (pos > MAX_PIECES)
continue;
int4 pos = (int4)(attribId - ATTRIB_PIECE1.getId());
while(pieces.size() <= pos)
pieces.emplace_back();
VarnodeData &vdat( pieces[pos] );
Expand Down
17 changes: 5 additions & 12 deletions Ghidra/Features/Decompiler/src/decompile/cpp/space.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,7 @@ extern AttributeId ATTRIB_DEADCODEDELAY; ///< Marshaling attribute "deadcodedela
extern AttributeId ATTRIB_DELAY; ///< Marshaling attribute "delay"
extern AttributeId ATTRIB_LOGICALSIZE; ///< Marshaling attribute "logicalsize"
extern AttributeId ATTRIB_PHYSICAL; ///< Marshaling attribute "physical"
extern AttributeId ATTRIB_PIECE1; ///< Marshaling attribute "piece1"
extern AttributeId ATTRIB_PIECE2; ///< Marshaling attribute "piece2"
extern AttributeId ATTRIB_PIECE3; ///< Marshaling attribute "piece3"
extern AttributeId ATTRIB_PIECE4; ///< Marshaling attribute "piece4"
extern AttributeId ATTRIB_PIECE5; ///< Marshaling attribute "piece5"
extern AttributeId ATTRIB_PIECE6; ///< Marshaling attribute "piece6"
extern AttributeId ATTRIB_PIECE7; ///< Marshaling attribute "piece7"
extern AttributeId ATTRIB_PIECE8; ///< Marshaling attribute "piece8"
extern AttributeId ATTRIB_PIECE9; ///< Marshaling attribute "piece9"
extern AttributeId ATTRIB_PIECE; ///< Marshaling attribute "piece"

/// \brief A region where processor data is stored
///
Expand All @@ -68,8 +60,8 @@ extern AttributeId ATTRIB_PIECE9; ///< Marshaling attribute "piece9"
/// offsets ranging from 0x00000000 to 0xffffffff within the space
/// for a total of 2^32 addressable bytes within the space.
/// There can be multiple address spaces, and it is typical to have spaces
/// - \b ram Modelling the main processor address bus
/// - \b register Modelling a processors registers
/// - \b ram Modeling the main processor address bus
/// - \b register Modeling a processors registers
///
/// The processor specification can set up any address spaces it
/// needs in an arbitrary manner, but \e all data manipulated by
Expand All @@ -80,7 +72,7 @@ extern AttributeId ATTRIB_PIECE9; ///< Marshaling attribute "piece9"
/// The analysis engine also uses additional address spaces to
/// model special concepts. These include
/// - \b const There is a \e constant address space for
/// modelling constant values in pcode expressions
/// modeling constant values in p-code expressions
/// (See ConstantSpace)
/// - \b unique There is always a \e unique address space used
/// as a pool for temporary registers. (See UniqueSpace)
Expand Down Expand Up @@ -240,6 +232,7 @@ public:
/// mapping the logical address in this space to its physical pieces. Offsets into this space do not
/// have an absolute meaning, the database may vary what offset is assigned to what set of pieces.
class JoinSpace : public AddrSpace {
static const int4 MAX_PIECES = 64; ///< Maximum number of pieces that can be marshaled in one \e join address
public:
JoinSpace(AddrSpaceManager *m,const Translate *t,int4 ind);
virtual int4 overlapJoin(uintb offset,int4 size,AddrSpace *pointSpace,uintb pointOff,int4 pointSkip) const;
Expand Down
2 changes: 1 addition & 1 deletion Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ void TypeOpFunc::printRaw(ostream &s,const PcodeOp *op)
TypeOpCopy::TypeOpCopy(TypeFactory *t) : TypeOp(t,CPUI_COPY,"copy")

{
opflags = PcodeOp::unary;
opflags = PcodeOp::unary | PcodeOp::nocollapse;
behave = new OpBehaviorCopy();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
* The static restoreXML methods read an \<addr> tag and produce a general AddressXML object.
*/
public class AddressXML {

public static int MAX_PIECES = 64; // Maximum pieces that can be marshaled in one join address
private AddressSpace space; // Address space containing the memory range
private long offset; // Starting offset of the range
private long size; // Number of bytes in the size
Expand Down Expand Up @@ -596,32 +598,13 @@ public static void encode(Encoder encoder, Varnode[] varnodes, long logicalsize)
AddressXML.encode(encoder, varnodes[0].getAddress(), varnodes[0].getSize());
return;
}
if (varnodes.length > MAX_PIECES) {
throw new IOException("Exceeded maximum pieces in one join address");
}
encoder.openElement(ELEM_ADDR);
encoder.writeSpace(ATTRIB_SPACE, AddressSpace.VARIABLE_SPACE);
encoder.writeString(ATTRIB_PIECE1, varnodes[0].encodePiece());
if (varnodes.length > 1) {
encoder.writeString(ATTRIB_PIECE2, varnodes[1].encodePiece());
}
if (varnodes.length > 2) {
encoder.writeString(ATTRIB_PIECE3, varnodes[2].encodePiece());
}
if (varnodes.length > 3) {
encoder.writeString(ATTRIB_PIECE4, varnodes[3].encodePiece());
}
if (varnodes.length > 4) {
encoder.writeString(ATTRIB_PIECE5, varnodes[4].encodePiece());
}
if (varnodes.length > 5) {
encoder.writeString(ATTRIB_PIECE6, varnodes[5].encodePiece());
}
if (varnodes.length > 6) {
encoder.writeString(ATTRIB_PIECE7, varnodes[6].encodePiece());
}
if (varnodes.length > 7) {
encoder.writeString(ATTRIB_PIECE8, varnodes[7].encodePiece());
}
if (varnodes.length > 8) {
encoder.writeString(ATTRIB_PIECE9, varnodes[8].encodePiece());
for (int i = 0; i < varnodes.length; ++i) {
encoder.writeStringIndexed(ATTRIB_PIECE, i, varnodes[i].encodePiece());
}
if (logicalsize != 0) {
encoder.writeUnsignedInteger(ATTRIB_LOGICALSIZE, logicalsize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,7 @@ public record AttributeId(String name, int id) {
public static final AttributeId ATTRIB_DELAY = new AttributeId("delay", 91);
public static final AttributeId ATTRIB_LOGICALSIZE = new AttributeId("logicalsize", 92);
public static final AttributeId ATTRIB_PHYSICAL = new AttributeId("physical", 93);
public static final AttributeId ATTRIB_PIECE1 = new AttributeId("piece1", 94); // piece attributes must have sequential ids
public static final AttributeId ATTRIB_PIECE2 = new AttributeId("piece2", 95);
public static final AttributeId ATTRIB_PIECE3 = new AttributeId("piece3", 96);
public static final AttributeId ATTRIB_PIECE4 = new AttributeId("piece4", 97);
public static final AttributeId ATTRIB_PIECE5 = new AttributeId("piece5", 98);
public static final AttributeId ATTRIB_PIECE6 = new AttributeId("piece6", 99);
public static final AttributeId ATTRIB_PIECE7 = new AttributeId("piece7", 100);
public static final AttributeId ATTRIB_PIECE8 = new AttributeId("piece8", 101);
public static final AttributeId ATTRIB_PIECE9 = new AttributeId("piece9", 102);
public static final AttributeId ATTRIB_PIECE = new AttributeId("piece", 94);

// architecture
public static final AttributeId ATTRIB_ADJUSTVMA = new AttributeId("adjustvma", 103);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ public interface Decoder extends ByteIngest {
*/
public int getNextAttributeId() throws DecoderException;

/**
* Get the id for the (current) attribute, assuming it is indexed.
* Assuming the previous call to getNextAttributeId() returned the id of ATTRIB_UNKNOWN,
* reinterpret the attribute as being an indexed form of the given attribute. If the attribute
* matches, return this indexed id, otherwise return ATTRIB_UNKNOWN.
* @param attribId is the attribute being indexed
* @return the indexed id or ATTRIB_UNKNOWN
* @throws DecoderException for unexpected end of stream
*/
public int getIndexedAttributeId(AttributeId attribId) throws DecoderException;

/**
* Reset attribute traversal for the current element
* Attributes for a single element can be traversed more than once using the getNextAttributeId
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,19 @@ public interface Encoder {
*/
void writeString(AttributeId attribId, String val) throws IOException;

/**
* Write an annotated string, using an indexed attribute, into the encoding.
* Multiple attributes with a shared name can be written to the same element by calling this
* method multiple times with a different index value. The encoding will use attribute ids up
* to the base id plus the maximum index passed in. Implementors must be careful to not use
* other attributes with ids bigger than the base id within the element taking the indexed attribute.
* @param attribId is the shared AttributeId
* @param index is the unique index to associated with the string
* @param val is the string to encode
* @throws IOException for errors in the underlying stream
*/
void writeStringIndexed(AttributeId attribId, int index, String val) throws IOException;

/**
* Write an address space reference into the encoding
* The address space is associated with the given AttributeId annotation and the current open element.
Expand Down
Loading

0 comments on commit b39c60e

Please sign in to comment.