diff --git a/include/keystone/keystone.h b/include/keystone/keystone.h index 2a885d04..5420c414 100644 --- a/include/keystone/keystone.h +++ b/include/keystone/keystone.h @@ -149,11 +149,12 @@ typedef enum ks_opt_type { // Runtime option value (associated with ks_opt_type above) typedef enum ks_opt_value { - KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX). - KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet. - KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_INTEL = 1 << 0, // X86 Intel syntax - default on X86 (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_ATT = 1 << 1, // X86 ATT asm syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_NASM = 1 << 2, // X86 Nasm syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_MASM = 1 << 3, // X86 Masm syntax (KS_OPT_SYNTAX) - unsupported yet. + KS_OPT_SYNTAX_GAS = 1 << 4, // X86 GNU GAS syntax (KS_OPT_SYNTAX). + KS_OPT_SYNTAX_RADIX16 = 1 << 5, // All immediates are in hex format (i.e 12 is 0x12) } ks_opt_value; diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 0b6613ae..a0619461 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -151,6 +151,9 @@ class MCAsmInfo { /// Which dialect of an assembler variant to use. Defaults to 0 unsigned AssemblerDialect; + /// Default Radix for immediate + unsigned Radix; + /// This is true if the assembler allows @ characters in symbol names. /// Defaults to false. bool AllowAtInName; @@ -474,6 +477,8 @@ class MCAsmInfo { const char *getCode64Directive() const { return Code64Directive; } unsigned getAssemblerDialect() const { return AssemblerDialect; } void setAssemblerDialect(unsigned v) { AssemblerDialect = v; } + void setRadix(unsigned v) { Radix = v; } + unsigned getRadix() const { return Radix; } bool doesAllowAtInName() const { return AllowAtInName; } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h index 1bb6d212..6fc909b0 100644 --- a/llvm/include/llvm/MC/MCParser/AsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h @@ -30,6 +30,7 @@ class AsmLexer : public MCAsmLexer { const char *CurPtr; StringRef CurBuf; bool isAtStartOfLine; + unsigned defaultRadix; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; diff --git a/llvm/keystone/ks.cpp b/llvm/keystone/ks.cpp index 9b1b63b7..fbd38ca2 100644 --- a/llvm/keystone/ks.cpp +++ b/llvm/keystone/ks.cpp @@ -495,11 +495,18 @@ ks_err ks_option(ks_engine *ks, ks_opt_type type, size_t value) switch(value) { default: return KS_ERR_OPT_INVALID; + case KS_OPT_SYNTAX_RADIX16: // default syntax is Intel + case KS_OPT_SYNTAX_NASM | KS_OPT_SYNTAX_RADIX16: + case KS_OPT_SYNTAX_INTEL | KS_OPT_SYNTAX_RADIX16: + ks->MAI->setRadix(16); case KS_OPT_SYNTAX_NASM: case KS_OPT_SYNTAX_INTEL: ks->syntax = (ks_opt_value)value; ks->MAI->setAssemblerDialect(1); break; + case KS_OPT_SYNTAX_GAS | KS_OPT_SYNTAX_RADIX16: + case KS_OPT_SYNTAX_ATT | KS_OPT_SYNTAX_RADIX16: + ks->MAI->setRadix(16); case KS_OPT_SYNTAX_GAS: case KS_OPT_SYNTAX_ATT: ks->syntax = (ks_opt_value)value; diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index ebcb922a..0eb6c3b5 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -25,6 +25,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); + defaultRadix = MAI.getRadix(); } AsmLexer::~AsmLexer() { @@ -259,6 +260,10 @@ AsmToken AsmLexer::LexDigit() // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); + + if (defaultRadix == 16) + Radix = 16; + bool isHex = Radix == 16; // Check for floating point literals. if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { @@ -274,8 +279,10 @@ AsmToken AsmLexer::LexDigit() "invalid hexdecimal number"); // Consume the [bB][hH]. - if (Radix == 2 || Radix == 16) - ++CurPtr; + if (defaultRadix != 16) { + if (Radix == 2 || Radix == 16) + ++CurPtr; + } // The darwin/x86 (and x86-64) assembler accepts and ignores type // suffices on integer literals. diff --git a/samples/sample.c b/samples/sample.c index 0077a3d3..d540f65a 100644 --- a/samples/sample.c +++ b/samples/sample.c @@ -53,6 +53,15 @@ int main(int argc, char **argv) test_ks(KS_ARCH_X86, KS_MODE_32, "add %ecx, %eax", KS_OPT_SYNTAX_ATT); test_ks(KS_ARCH_X86, KS_MODE_64, "add %rcx, %rax", KS_OPT_SYNTAX_ATT); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 0x15", 0); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15h", 0); + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", 0); + + // RADIX16 syntax Intel (default syntax) + test_ks(KS_ARCH_X86, KS_MODE_32, "add eax, 15", KS_OPT_SYNTAX_RADIX16); + // RADIX16 syntax for AT&T + test_ks(KS_ARCH_X86, KS_MODE_32, "add $15, %eax", KS_OPT_SYNTAX_RADIX16 | KS_OPT_SYNTAX_ATT); + // ARM test_ks(KS_ARCH_ARM, KS_MODE_ARM, "sub r1, r2, r5", 0); test_ks(KS_ARCH_ARM, KS_MODE_ARM + KS_MODE_BIG_ENDIAN, "sub r1, r2, r5", 0);