Skip to content

Commit

Permalink
Experimental AVX Support on X86
Browse files Browse the repository at this point in the history
Initial support for AVX/AVX2/FMA instructions that uses VEX prefixes.

Signed-off-by: Victor Ding <[email protected]>
  • Loading branch information
Victor Ding committed Apr 28, 2017
1 parent e29ca8a commit adc3db5
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 48 deletions.
84 changes: 83 additions & 1 deletion compiler/x/codegen/OMRInstruction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ class OMR_EXTENSIBLE Instruction : public OMR::Instruction
inline uint8_t RM(uint8_t B = 0) const
{
TR_ASSERT(mod == 0x3, "ModRM is not in register mode");
return (B << 3) | (0x7 & reg);
return (B << 3) | (0x7 & rm);
}
inline ModRM* setMod(uint8_t mod = 0x03) // 0b11
{
Expand Down Expand Up @@ -299,8 +299,90 @@ class OMR_EXTENSIBLE Instruction : public OMR::Instruction
return 0x0f & *((uint8_t*)this);
}
};
template<size_t VEX_SIZE>
struct VEX
{
VEX() {TR_ASSERT(false, "INVALID VEX PREFIX");}
};
};

template<>
struct Instruction::VEX<3>
{
// Byte 0: C4
uint8_t escape;
// Byte 1
uint8_t m : 5;
uint8_t B : 1;
uint8_t X : 1;
uint8_t R : 1;
// Byte 2
uint8_t p : 2;
uint8_t L : 1;
uint8_t v : 4;
uint8_t W : 1;
// Byte 3: opcode
uint8_t opcode;
// Byte 4: ModRM
ModRM modrm;

inline VEX() {}
inline VEX(const REX& rex, uint8_t ModRMOpCode) : modrm(ModRMOpCode)
{
escape = '\xC4';
R = ~rex.R;
X = ~rex.X;
B = ~rex.B;
W = rex.W;
v = 0xf; //0b1111
}
inline bool CanBeShortened() const
{
return X && B && !W && (m == 1);
}
inline uint8_t Reg() const
{
return modrm.Reg(~R);
}
inline uint8_t RM() const
{
return modrm.RM(~B);
}
};
template<>
struct Instruction::VEX<2>
{
// Byte 0: C5
uint8_t escape;
// Byte 1
uint8_t p : 2;
uint8_t L : 1;
uint8_t v : 4;
uint8_t R : 1;
// Byte 2: opcode
uint8_t opcode;
// Byte 3: ModRM
ModRM modrm;

inline VEX() {}
inline VEX(const VEX<3>& other) : modrm(other.modrm)
{
escape = '\xC5';
p = other.p;
L = other.L;
v = other.v;
R = other.R;
opcode = other.opcode;
}
inline uint8_t Reg() const
{
return modrm.Reg(~R);
}
inline uint8_t RM() const
{
return modrm.RM();
}
};
}

}
Expand Down
1 change: 1 addition & 0 deletions compiler/x/codegen/X86BinaryEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ uint8_t* OMR::X86::Instruction::generateBinaryEncoding()
// cursor is NULL when generateOperand() requests to regenerate the binary code, which may happen during encoding of memref with unresolved symbols on 64-bit
if (cursor)
{
self()->getOpCode().finalize(instructionStart);
self()->setBinaryLength(cursor - instructionStart);
self()->cg()->addAccumulatedInstructionLengthError(self()->getEstimatedBinaryLength() - self()->getBinaryLength());
return cursor;
Expand Down
23 changes: 18 additions & 5 deletions compiler/x/codegen/X86Ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,23 +359,32 @@ class TR_X86OpCode
break;
}
}
// check if the instruction has mandatory prefix(es)
inline bool hasMandatoryPrefix() const
// check if the instruction can be encoded as AVX
inline bool supportsAVX() const
{
return prefixes == PREFIX___;
return vex_l != VEX_L___;
}
// check if the instruction is X87
inline bool isX87() const
{
return (prefixes == PREFIX___) && (opcode >= 0xd8) && (opcode <= 0xdf);
}
// check if the instruction has mandatory prefix(es)
inline bool hasMandatoryPrefix() const
{
return prefixes == PREFIX___;
}
// check if the instruction is part of Group 7 OpCode Extension
inline bool isGroup07() const
{
return (escape == ESCAPE_0F__) && (opcode == 0x01);
}
// TBuffer should only be one of the two: Estimator when calculating length, and Writer when generating binaries.
template <class TBuffer> typename TBuffer::cursor_t encode(typename TBuffer::cursor_t cursor, uint8_t rexbits) const;
template <class TBuffer> inline typename TBuffer::cursor_t encode(typename TBuffer::cursor_t cursor, uint8_t rexbits) const;
// finalize instruction prefix information, currently only in-use for AVX instructions for VEX.vvvv field
inline void finalize(uint8_t* cursor) const;
private:
inline static bool allowsAVX();
};
template <typename TCursor>
class BufferBase
Expand Down Expand Up @@ -593,7 +602,11 @@ class TR_X86OpCode
CheckAndFinishGroup07(ret);
return ret;
}

void finalize(uint8_t* cursor)
{
if (!isPseudoOp())
info().finalize(cursor);
}
void convertLongBranchToShort()
{ // input must be a long branch in range JA4 - JMP4
if (((int)_opCode >= (int)JA4) && ((int)_opCode <= (int)JMP4))
Expand Down
131 changes: 93 additions & 38 deletions compiler/x/codegen/X86Ops_inlines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
#ifndef X86OPS_INLINES_INCL
#define X86OPS_INLINES_INCL

inline bool TR_X86OpCode::OpCode_t::allowsAVX()
{
static bool enable = feGetEnv("TR_EnableAVX") && TR::CodeGenerator::getX86ProcessorInfo().supportsAVX();
return enable;
}

template <typename TBuffer> inline typename TBuffer::cursor_t TR_X86OpCode::OpCode_t::encode(typename TBuffer::cursor_t cursor, uint8_t rexbits) const
{
if (isX87())
Expand All @@ -34,54 +40,103 @@ template <typename TBuffer> inline typename TBuffer::cursor_t TR_X86OpCode::OpCo
// Prefixes
TR::Instruction::REX rex(rexbits);
rex.W = rex_w;
switch (prefixes)
TR_ASSERT(TR::Compiler->target.is64Bit() || !rex.value(), "ERROR: REX.W used on X86-32. OpCode = %d; rex = %02x", opcode, (uint32_t)(uint8_t)rex.value());
// Use AVX if possible
if (supportsAVX() && allowsAVX())
{
case PREFIX___:
break;
case PREFIX_66:
buffer.append('\x66');
break;
case PREFIX_F2:
buffer.append('\xf2');
break;
case PREFIX_F3:
buffer.append('\xf3');
break;
default:
break;
TR::Instruction::VEX<3> vex(rex, modrm_opcode);
vex.m = escape;
vex.L = vex_l;
vex.p = prefixes;
vex.opcode = opcode;
if(vex.CanBeShortened())
{
buffer.append(TR::Instruction::VEX<2>(vex));
}
else
{
buffer.append(vex);
}
}
// REX
if (rex.value() || rexbits)
else
{
buffer.append(rex);
switch (prefixes)
{
case PREFIX___:
break;
case PREFIX_66:
buffer.append('\x66');
break;
case PREFIX_F2:
buffer.append('\xf2');
break;
case PREFIX_F3:
buffer.append('\xf3');
break;
default:
break;
}
// REX
if (rex.value() || rexbits)
{
buffer.append(rex);
}
// OpCode escape
switch (escape)
{
case ESCAPE_____:
break;
case ESCAPE_0F__:
buffer.append('\x0f');
break;
case ESCAPE_0F38:
buffer.append('\x0f');
buffer.append('\x38');
break;
case ESCAPE_0F3A:
buffer.append('\x0f');
buffer.append('\x3a');
break;
default:
break;
}
// OpCode
buffer.append(opcode);
// ModRM
if (modrm_form)
{
buffer.append(TR::Instruction::ModRM(modrm_opcode));
}
}
// OpCode escape
switch (escape)
return buffer;
}

inline void TR_X86OpCode::OpCode_t::finalize(uint8_t* cursor) const
{
// Finalize VEX prefix
switch (*cursor)
{
case ESCAPE_____:
break;
case ESCAPE_0F__:
buffer.append('\x0f');
break;
case ESCAPE_0F38:
buffer.append('\x0f');
buffer.append('\x38');
case 0xC4:
{
auto pVEX = (TR::Instruction::VEX<3>*)cursor;
if (vex_v == VEX_vReg_)
{
pVEX->v = ~(modrm_form == ModRM_EXT_ ? pVEX->RM() : pVEX->Reg());
}
}
break;
case ESCAPE_0F3A:
buffer.append('\x0f');
buffer.append('\x3a');
case 0xC5:
{
auto pVEX = (TR::Instruction::VEX<2>*)cursor;
if (vex_v == VEX_vReg_)
{
pVEX->v = ~(modrm_form == ModRM_EXT_ ? pVEX->RM() : pVEX->Reg());
}
}
break;
default:
break;
}
// OpCode
buffer.append(opcode);
// ModRM
if (modrm_form)
{
buffer.append(TR::Instruction::ModRM(modrm_opcode));
}
return buffer;
}

inline void TR_X86OpCode::CheckAndFinishGroup07(uint8_t* cursor)
Expand Down
9 changes: 5 additions & 4 deletions doc/compiler/x/OpCodeEncoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,13 @@ enum TR_OpCodeImmediate : uint8_t
5. Write opcode
6. Set and write ModR/M field if necessary

# FUTURE WORK

## Generate AVX Instruction
1. Obtain REX prefix from operand and set REX.W according to rex_w field.
2. If REX.X == 0 AND REX.B == 0 AND REX.W == 0 AND escape == ESCAPE____, use 2-byte prefix; otherwise use 3-byte prefix
3. Setup VEX structure and write it.
2. Setup 3-byte VEX structure.
2.1 Convert the 3-byte VEX to 2-byte VEX if possible
3. Write the VEX prefix

# FUTURE WORK

## Generate AVX-512 Instruction
1. Obtain REX prefix from operand and set REX.W according to rex_w field.
Expand Down

0 comments on commit adc3db5

Please sign in to comment.