-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[JIT] Enable EGPRs in JIT by adding REX2 encoding to the backend. #106557
Changes from all commits
1820567
d1afc68
2335aa3
6578c58
01eeb80
690aee3
31d7fb4
a995878
74aacf6
c330927
fbf20d1
ea02e70
c74b801
34980b4
2ffdbeb
3a729bb
d943b03
c8fee9c
6ec0e97
1d01003
1acc219
87ad443
bb9905a
86083b2
dfe8760
64761cd
f1aba62
f5cc5a8
7ca8433
bc4d225
deb3814
0d63230
13b8076
42c6cfc
2e2eb01
3d298b7
25a54d3
791b505
094e76b
6502ae1
5d3cca2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9053,6 +9053,225 @@ void CodeGen::genAmd64EmitterUnitTestsSse2() | |
GetEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); | ||
} | ||
|
||
/***************************************************************************** | ||
* Unit tests for the APX instructions. | ||
*/ | ||
|
||
void CodeGen::genAmd64EmitterUnitTestsApx() | ||
{ | ||
emitter* theEmitter = GetEmitter(); | ||
|
||
genDefineTempLabel(genCreateTempLabel()); | ||
|
||
// This test suite needs REX2 enabled. | ||
if (!theEmitter->UseRex2Encoding() && !theEmitter->emitComp->DoJitStressRex2Encoding()) | ||
{ | ||
return; | ||
} | ||
|
||
theEmitter->emitIns_R_R(INS_add, EA_1BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_add, EA_2BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_add, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_add, EA_8BYTE, REG_EAX, REG_ECX); | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
theEmitter->emitIns_R_R(INS_or, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_adc, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_and, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_sub, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_xor, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_bsf, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_bsr, EA_4BYTE, REG_EAX, REG_ECX); | ||
|
||
theEmitter->emitIns_R_R(INS_cmovo, EA_4BYTE, REG_EAX, REG_ECX); | ||
|
||
theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_EAX, REG_ECX, false); | ||
theEmitter->emitIns_Mov(INS_movsx, EA_2BYTE, REG_EAX, REG_ECX, false); | ||
theEmitter->emitIns_Mov(INS_movzx, EA_2BYTE, REG_EAX, REG_ECX, false); | ||
|
||
theEmitter->emitIns_R_R(INS_popcnt, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_lzcnt, EA_4BYTE, REG_EAX, REG_ECX); | ||
theEmitter->emitIns_R_R(INS_tzcnt, EA_4BYTE, REG_EAX, REG_ECX); | ||
|
||
theEmitter->emitIns_R_I(INS_add, EA_4BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_add, EA_2BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_or, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_adc, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_sbb, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_and, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_sub, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_xor, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_cmp, EA_4BYTE, REG_EAX, 0x05); | ||
theEmitter->emitIns_R_I(INS_test, EA_4BYTE, REG_EAX, 0x05); | ||
|
||
theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_EAX, 0xE0); | ||
|
||
// JIT tend to compress imm64 to imm32 if higher half is all-zero, make sure this test checks the path for imm64. | ||
theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_RAX, 0xFFFF000000000000); | ||
|
||
// shf reg, cl | ||
theEmitter->emitIns_R(INS_rol, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_ror, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_rcl, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_rcr, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_shl, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_shr, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_sar, EA_4BYTE, REG_EAX); | ||
|
||
// shf reg, 1 | ||
theEmitter->emitIns_R(INS_rol_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_ror_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_rcl_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_rcr_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_shl_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_shr_1, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_sar_1, EA_4BYTE, REG_EAX); | ||
|
||
// shf reg, imm8 | ||
theEmitter->emitIns_R_I(INS_shl_N, EA_4BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_shr_N, EA_4BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_sar_N, EA_4BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_rol_N, EA_4BYTE, REG_ECX, 0x05); | ||
theEmitter->emitIns_R_I(INS_ror_N, EA_4BYTE, REG_ECX, 0x05); | ||
// TODO-xarch-apx: not enable these 2 for now. | ||
// theEmitter->emitIns_R_I(INS_rcl_N, EA_4BYTE, REG_ECX, 0x05); | ||
// theEmitter->emitIns_R_I(INS_rcr_N, EA_4BYTE, REG_ECX, 0x05); | ||
Comment on lines
+9137
to
+9139
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for these ones being skipped? Can we open tracking issues and list the issue number as part of the comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/dotnet/runtime/blob/main/src/coreclr/jit/emitxarch.cpp#L18695 It seems that the latency/tp information is missing for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its not required in this PR, but it would be good to ensure its all handled or tracked long term. I imagine this is representative of a potentially missing optimization. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I can submit an issue accordingly. |
||
|
||
theEmitter->emitIns_R(INS_neg, EA_2BYTE, REG_EAX); | ||
theEmitter->emitIns_R(INS_not, EA_2BYTE, REG_EAX); | ||
|
||
theEmitter->emitIns_R_AR(INS_lea, EA_4BYTE, REG_ECX, REG_EAX, 4); | ||
|
||
theEmitter->emitIns_R_AR(INS_mov, EA_1BYTE, REG_ECX, REG_EAX, 4); | ||
theEmitter->emitIns_R_AR(INS_mov, EA_2BYTE, REG_ECX, REG_EAX, 4); | ||
theEmitter->emitIns_R_AR(INS_mov, EA_4BYTE, REG_ECX, REG_EAX, 4); | ||
theEmitter->emitIns_R_AR(INS_mov, EA_8BYTE, REG_ECX, REG_EAX, 4); | ||
|
||
theEmitter->emitIns_R_AR(INS_add, EA_1BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_add, EA_2BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_add, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_add, EA_8BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_or, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_adc, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_and, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_sub, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_xor, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_test, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_bsf, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_bsr, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_popcnt, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_lzcnt, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_tzcnt, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
|
||
theEmitter->emitIns_AR_R(INS_add, EA_1BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_add, EA_2BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_add, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_add, EA_8BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_or, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_adc, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_and, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_sub, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_xor, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_AR_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
|
||
theEmitter->emitIns_R_AR(INS_movsx, EA_2BYTE, REG_ECX, REG_EAX, 4); | ||
theEmitter->emitIns_R_AR(INS_movzx, EA_2BYTE, REG_EAX, REG_ECX, 4); | ||
theEmitter->emitIns_R_AR(INS_cmovo, EA_4BYTE, REG_EAX, REG_ECX, 4); | ||
|
||
theEmitter->emitIns_AR_R(INS_xadd, EA_4BYTE, REG_EAX, REG_EDX, 2); | ||
|
||
theEmitter->emitIns_R_R_I(INS_shld, EA_4BYTE, REG_EAX, REG_ECX, 5); | ||
theEmitter->emitIns_R_R_I(INS_shrd, EA_2BYTE, REG_EAX, REG_ECX, 5); | ||
// TODO-XArch-apx: S_R_I path only accepts SEE or VEX instructions, | ||
// so I assuem shld/shrd will not be taking the first argument from stack. | ||
// theEmitter->emitIns_S_R_I(INS_shld, EA_2BYTE, 1, 2, REG_EAX, 5); | ||
// theEmitter->emitIns_S_R_I(INS_shrd, EA_2BYTE, 1, 2, REG_EAX, 5); | ||
|
||
theEmitter->emitIns_AR_R(INS_cmpxchg, EA_2BYTE, REG_EAX, REG_EDX, 2); | ||
|
||
theEmitter->emitIns_R(INS_seto, EA_1BYTE, REG_EDX); | ||
|
||
theEmitter->emitIns_R(INS_bswap, EA_8BYTE, REG_EDX); | ||
|
||
// INS_bt only has reg-to-reg form. | ||
theEmitter->emitIns_R_R(INS_bt, EA_2BYTE, REG_EAX, REG_EDX); | ||
|
||
theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_EDX); | ||
|
||
theEmitter->emitIns_R_R(INS_xchg, EA_8BYTE, REG_EAX, REG_EDX); | ||
|
||
theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_EDX); | ||
theEmitter->emitIns_R(INS_mulEAX, EA_8BYTE, REG_EDX); | ||
|
||
GenTreePhysReg physReg(REG_EDX); | ||
physReg.SetRegNum(REG_EDX); | ||
GenTreeIndir load = indirForm(TYP_INT, &physReg); | ||
|
||
theEmitter->emitIns_R_A(INS_add, EA_1BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_add, EA_2BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_add, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_add, EA_8BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_or, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_adc, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_sbb, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_and, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_sub, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_xor, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_cmp, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_test, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_bsf, EA_4BYTE, REG_EAX, &load); | ||
theEmitter->emitIns_R_A(INS_bsr, EA_4BYTE, REG_EAX, &load); | ||
|
||
// Note: | ||
// All the tests below rely on the runtime status of the stack this unit tests attaching to, | ||
// it might fail due to stack value unavailable/mismatch, since these tests are mainly for | ||
// encoding correctness check, this kind of failures may be considered as not harmful. | ||
|
||
theEmitter->emitIns_R_S(INS_add, EA_1BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_add, EA_2BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_add, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_add, EA_8BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_or, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_adc, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_sbb, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_and, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_sub, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_xor, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_cmp, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_test, EA_4BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_S_R(INS_xadd, EA_2BYTE, REG_EAX, 0, 0); | ||
|
||
theEmitter->emitIns_S_I(INS_shl_N, EA_4BYTE, 0, 0, 4); | ||
theEmitter->emitIns_S(INS_shl_1, EA_4BYTE, 0, 4); | ||
|
||
theEmitter->emitIns_R_S(INS_movsx, EA_2BYTE, REG_ECX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_movzx, EA_2BYTE, REG_EAX, 0, 0); | ||
theEmitter->emitIns_R_S(INS_cmovo, EA_4BYTE, REG_EAX, 0, 0); | ||
|
||
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_EAX); | ||
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_EAX); | ||
theEmitter->emitIns_R(INS_pop_hide, EA_PTRSIZE, REG_EAX); | ||
theEmitter->emitIns_R(INS_push_hide, EA_PTRSIZE, REG_EAX); | ||
|
||
theEmitter->emitIns_S(INS_pop, EA_PTRSIZE, 0, 0); | ||
theEmitter->emitIns_I(INS_push, EA_PTRSIZE, 50); | ||
|
||
theEmitter->emitIns_R(INS_inc, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_AR(INS_inc, EA_2BYTE, REG_EAX, 2); | ||
theEmitter->emitIns_S(INS_inc, EA_2BYTE, 0, 0); | ||
theEmitter->emitIns_R(INS_dec, EA_4BYTE, REG_EAX); | ||
theEmitter->emitIns_AR(INS_dec, EA_2BYTE, REG_EAX, 2); | ||
theEmitter->emitIns_S(INS_dec, EA_2BYTE, 0, 0); | ||
|
||
theEmitter->emitIns_S(INS_neg, EA_2BYTE, 0, 0); | ||
theEmitter->emitIns_S(INS_not, EA_2BYTE, 0, 0); | ||
} | ||
|
||
#endif // defined(DEBUG) && defined(TARGET_AMD64) | ||
|
||
#ifdef PROFILING_SUPPORTED | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not related to this PR really, but it'd be nice if we had similar tests for other ISAs/encodings (VEX, EVEX, etc). Sse2 itself is, afair, really just SimdLegacyEncoding.