Skip to content

Commit

Permalink
support new syntax RADIX16. see issue keystone-engine#11
Browse files Browse the repository at this point in the history
  • Loading branch information
aquynh committed Sep 9, 2016
1 parent 3664a62 commit eac9cc9
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 7 deletions.
11 changes: 6 additions & 5 deletions include/keystone/keystone.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,12 @@ typedef enum ks_opt_type {

// Runtime option value (associated with ks_opt_type above)
typedef enum ks_opt_value {
KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet.
KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet.
KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX).
KS_OPT_SYNTAX_RADIX16 = 1 << 5, // All immediates are in hex format (i.e 12 is 0x12)
} ks_opt_value;


Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/MC/MCAsmInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ class MCAsmInfo {
/// Which dialect of an assembler variant to use. Defaults to 0
unsigned AssemblerDialect;

/// Default Radix for immediate
unsigned Radix;

/// This is true if the assembler allows @ characters in symbol names.
/// Defaults to false.
bool AllowAtInName;
Expand Down Expand Up @@ -474,6 +477,8 @@ class MCAsmInfo {
const char *getCode64Directive() const { return Code64Directive; }
unsigned getAssemblerDialect() const { return AssemblerDialect; }
void setAssemblerDialect(unsigned v) { AssemblerDialect = v; }
void setRadix(unsigned v) { Radix = v; }
unsigned getRadix() const { return Radix; }
bool doesAllowAtInName() const { return AllowAtInName; }
bool supportsNameQuoting() const { return SupportsQuotedNames; }
bool doesSupportDataRegionDirectives() const {
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/MC/MCParser/AsmLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class AsmLexer : public MCAsmLexer {
const char *CurPtr;
StringRef CurBuf;
bool isAtStartOfLine;
unsigned defaultRadix;

void operator=(const AsmLexer&) = delete;
AsmLexer(const AsmLexer&) = delete;
Expand Down
7 changes: 7 additions & 0 deletions llvm/keystone/ks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,18 @@ ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value)
switch(value) {
default:
return KS_ERR_OPT_INVALID;
case KS_OPT_SYNTAX_RADIX16: // default syntax is Intel
case KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16:
case KS_OPT_SYNTAX_INTEL | KS_OPT_SYNTAX_RADIX16:
ks->MAI->setRadix(16);
case KS_OPT_SYNTAX_NASM:
case KS_OPT_SYNTAX_INTEL:
ks->syntax = (ks_opt_value)value;
ks->MAI->setAssemblerDialect(1);
break;
case KS_OPT_SYNTAX_GAS | KS_OPT_SYNTAX_RADIX16:
case KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16:
ks->MAI->setRadix(16);
case KS_OPT_SYNTAX_GAS:
case KS_OPT_SYNTAX_ATT:
ks->syntax = (ks_opt_value)value;
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/MC/MCParser/AsmLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
CurPtr = nullptr;
isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
defaultRadix = MAI.getRadix();
}

AsmLexer::~AsmLexer() {
Expand Down Expand Up @@ -259,6 +260,10 @@ AsmToken AsmLexer::LexDigit()
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doLookAhead(CurPtr, 10);

if (defaultRadix == 16)
Radix = 16;

bool isHex = Radix == 16;
// Check for floating point literals.
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
Expand All @@ -274,8 +279,10 @@ AsmToken AsmLexer::LexDigit()
"invalid hexdecimal number");

// Consume the [bB][hH].
if (Radix == 2 || Radix == 16)
++CurPtr;
if (defaultRadix != 16) {
if (Radix == 2 || Radix == 16)
++CurPtr;
}

// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
Expand Down
9 changes: 9 additions & 0 deletions samples/sample.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ int main(int argc, char **argv)
test_ks(KS_ARCH_X86, KS_MODE_32, "add %ecx, %eax", KS_OPT_SYNTAX_ATT);
test_ks(KS_ARCH_X86, KS_MODE_64, "add %rcx, %rax", KS_OPT_SYNTAX_ATT);

test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 0x15", 0);
test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15h", 0);
test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", 0);

// RADIX16 syntax Intel (default syntax)
test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", KS_OPT_SYNTAX_RADIX16);
// RADIX16 syntax for AT&T
test_ks(KS_ARCH_X86, KS_MODE_32, "add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT);

// ARM
test_ks(KS_ARCH_ARM, KS_MODE_ARM, "sub r1, r2, r5", 0);
test_ks(KS_ARCH_ARM, KS_MODE_ARM + KS_MODE_BIG_ENDIAN, "sub r1, r2, r5", 0);
Expand Down

0 comments on commit eac9cc9

Please sign in to comment.