Skip to content

Commit

Permalink
add vpcmp*, v(p)blend*
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Jul 20, 2016
1 parent 97743ee commit c57b602
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 4 deletions.
17 changes: 17 additions & 0 deletions gen/gen_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ void putVcmp()
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },

{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },

{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down Expand Up @@ -211,6 +221,13 @@ void putX_X_XM_IMM()

{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },

{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down
64 changes: 60 additions & 4 deletions test/make_512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const uint64 MEM8 = 1ULL << 15;
const uint64 MEM16 = 1ULL << 16;
const uint64 MEM32 = 1ULL << 17;
const uint64 VM32Z = 1ULL << 19;
const uint64 CL = 1ULL << 20;
const uint64 K_K = 1ULL << 20;
const uint64 MEM_ONLY_DISP = 1ULL << 21;
const uint64 NEG32 = 1ULL << 23;
const uint64 _YMM = 1ULL << 24;
Expand Down Expand Up @@ -335,8 +335,8 @@ class Test {
return "ax";
case AL:
return "al";
case CL:
return "cl";
case K_K:
return isXbyak_ ? "k5 | k3" : "k5{k3}";
case IMM32:
return isXbyak_ ? "12345678" : "dword 12345678";
case IMM8:
Expand Down Expand Up @@ -1589,10 +1589,62 @@ class Test {
}
#endif
}
void putBlend()
{
put("vblendmpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
put("vblendmpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
put("vblendmpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);

put("vblendmps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
put("vblendmps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
put("vblendmps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);

put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);

put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);

put("vpblendmd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
put("vpblendmd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
put("vpblendmd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);

put("vpblendmq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
put("vpblendmq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
put("vpblendmq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
}
void putVpcmp()
{
const uint64_t b0Tbl[] = { 0, 0, 0 };
const uint64_t b4Tbl[] = { M_1to4, M_1to8, M_1to16 };
const uint64_t b2Tbl[] = { M_1to2, M_1to4, M_1to8 };
const struct Tbl {
const char *name;
uint64_t b;
} tbl[] = {
{ "vpcmpb", 0 },
{ "vpcmpub", 0 },
{ "vpcmpw", 0 },
{ "vpcmpuw", 0 },
{ "vpcmpd", M_1to4 },
{ "vpcmpud", M_1to4 },
{ "vpcmpq", M_1to2 },
{ "vpcmpuq", M_1to2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
const uint64_t *bTbl = p.b == 0 ? b0Tbl : p.b == M_1to4 ? b4Tbl : b2Tbl;
put(p.name, K_K, _XMM, _XMM | _MEM | bTbl[0], IMM8);
put(p.name, K_K, _YMM, _YMM | _MEM | bTbl[1], IMM8);
put(p.name, K_K, _ZMM, _ZMM | _MEM | bTbl[2], IMM8);
}
}
void putMin()
{
#ifdef XBYAK64
putGather();
putVpcmp();
#endif
}
void putAVX512()
Expand Down Expand Up @@ -1631,6 +1683,10 @@ class Test {
putMisc1();
separateFunc();
putGather();
separateFunc();
putBlend();
separateFunc();
putVpcmp();
#endif
}
};
Expand Down
14 changes: 14 additions & 0 deletions xbyak/xbyak_avx512.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k
void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
Expand Down Expand Up @@ -120,6 +128,12 @@ void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); }
void vpblendmb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x66); }
void vpblendmw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x66); }
void vpblendmd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x64); }
void vpblendmq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x64); }
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
Expand Down

0 comments on commit c57b602

Please sign in to comment.