diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 9e7999a7bc9e4..02908d68a7637 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -16,9 +16,9 @@ class AsmOffsets // Debug build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1a90; - public const int OFFSETOF__REGDISPLAY__SP = 0x1a78; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a80; + public const int SIZEOF__REGDISPLAY = 0x1b90; + public const int OFFSETOF__REGDISPLAY__SP = 0x1b78; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80; #else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0xbf0; public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; @@ -68,9 +68,9 @@ class AsmOffsets // Release build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1a80; - public const int OFFSETOF__REGDISPLAY__SP = 0x1a70; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a78; + public const int SIZEOF__REGDISPLAY = 0x1b80; + public const int OFFSETOF__REGDISPLAY__SP = 0x1b70; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b78; #else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0xbf0; public const int OFFSETOF__REGDISPLAY__SP = 0xbd0; @@ -120,7 +120,7 @@ class AsmOffsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xca0; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIX diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index ce41b79ae7dc7..f3111afaa836b 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -81,33 +81,35 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512CD_X64=62, + InstructionSet_AVX512DQ_X64=63, + InstructionSet_AVX512VBMI_X64=64, + InstructionSet_AVX10v1_X64=65, + InstructionSet_AVX10v1_V512_X64=66, + InstructionSet_APX_X64=67, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -148,33 +150,35 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512CD_X64=62, + InstructionSet_AVX512DQ_X64=63, + InstructionSet_AVX512VBMI_X64=64, + InstructionSet_AVX10v1_X64=65, + InstructionSet_AVX10v1_V512_X64=66, + InstructionSet_APX_X64=67, #endif // TARGET_X86 }; @@ -344,6 +348,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v1_X64); if (HasInstructionSet(InstructionSet_AVX10v1_V512)) AddInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet_APX)) + AddInstructionSet(InstructionSet_APX_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -532,6 +538,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_APX) && !resultflags.HasInstructionSet(InstructionSet_APX_X64)) + resultflags.RemoveInstructionSet(InstructionSet_APX); + if (resultflags.HasInstructionSet(InstructionSet_APX_X64) && !resultflags.HasInstructionSet(InstructionSet_APX)) + resultflags.RemoveInstructionSet(InstructionSet_APX_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -940,6 +950,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_APX_X64 : + return "APX_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1018,6 +1032,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_APX : + return "APX"; #endif // TARGET_X86 default: @@ -1088,6 +1104,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1125,6 +1142,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 9592494c457c8..7a6479c81e5ae 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 6f498741-c4a2-4863-9dd7-06ad7d788443 */ - 0x6f498741, - 0xc4a2, - 0x4863, - {0x9d, 0xd7, 0x06, 0xad, 0x7d, 0x78, 0x84, 0x43} +constexpr GUID JITEEVersionIdentifier = { /* 381fc250-b8f3-4cee-834e-b0bc682a09f2 */ + 0x381fc250, + 0xb8f3, + 0x4cee, + {0x83, 0x4e, 0xb0, 0xbc, 0x68, 0x2a, 0x09, 0xf2} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 4ad8c6b4e5912..434e9bbd07bed 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V512=46, READYTORUN_INSTRUCTION_EVEX=47, + READYTORUN_INSTRUCTION_Apx=48, }; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 48ad12bf76a1c..fa7a2c71f50a0 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -804,6 +804,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 + { NI_Illegal, NI_Illegal }, // APX { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 694187631dd75..de8c0b941b2bd 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -37,6 +37,10 @@ #define REDHAWK_PALEXPORT extern "C" #define REDHAWK_PALAPI __stdcall +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + // Index for the fiber local storage of the attached thread pointer static uint32_t g_flsIndex = FLS_OUT_OF_INDEXES; @@ -541,7 +545,7 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB #endif //TARGET_X86 #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; @@ -632,9 +636,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont // This should not normally fail. // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. #if defined(TARGET_X86) || defined(TARGET_AMD64) - if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) + if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512"); + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); return FALSE; } #elif defined(TARGET_ARM64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 85c51f23a032a..48631c96621ec 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1374,12 +1374,14 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (UI64(1) << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((UI64(1) << (XSTATE_AVX512_KMASK)) | \ (UI64(1) << (XSTATE_AVX512_ZMM_H)) | \ (UI64(1) << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (UI64(1) << (XSTATE_APX)) typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; @@ -1616,6 +1618,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm30; M512 Zmm31; }; + + struct + { + DWORD64 Egpr16; + DWORD64 Egpr17; + DWORD64 Egpr18; + DWORD64 Egpr19; + DWORD64 Egpr20; + DWORD64 Egpr21; + DWORD64 Egpr22; + DWORD64 Egpr23; + DWORD64 Egpr24; + DWORD64 Egpr25; + DWORD64 Egpr26; + DWORD64 Egpr27; + DWORD64 Egpr28; + DWORD64 Egpr29; + DWORD64 Egpr30; + DWORD64 Egpr31; + }; + } CONTEXT, *PCONTEXT, *LPCONTEXT; // diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index d5a72cf6eda23..8e97efdbf6882 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -8,12 +8,14 @@ #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (1 << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((1 << (XSTATE_AVX512_KMASK)) | \ (1 << (XSTATE_AVX512_ZMM_H)) | \ (1 << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (1 << (XSTATE_APX)) // The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not // relevant, the arch bit is excluded from the flag constants below for simpler tests. @@ -91,7 +93,8 @@ #define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16) #define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8) #define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16) -#define CONTEXT_Size CONTEXT_Zmm16+(64*16) +#define CONTEXT_Egpr CONTEXT_Zmm16+(64*16) +#define CONTEXT_Size CONTEXT_Egpr+(8*16) #else // HOST_64BIT diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index dba772f9dbbf5..2b183798f0067 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,6 +183,46 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], XSTATE_MASK_APX + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + // TODO-XArch-APX: + // we are using raw hex code here to emit EGPRs-related changes, + // we will need to come back and re-write this part when assembler supports EGPRs. + + // mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] + .byte 0xd5, 0x48, 0x8b, 0x87, 0x20, 0x0c, 0x00, 0x00 + // mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] + .byte 0xd5, 0x48, 0x8b, 0x8f, 0x28, 0x0c, 0x00, 0x00 + // mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] + .byte 0xd5, 0x48, 0x8b, 0x97, 0x30, 0x0c, 0x00, 0x00 + // mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] + .byte 0xd5, 0x48, 0x8b, 0x9f, 0x38, 0x0c, 0x00, 0x00 + // mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] + .byte 0xd5, 0x48, 0x8b, 0xa7, 0x40, 0x0c, 0x00, 0x00 + // mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] + .byte 0xd5, 0x48, 0x8b, 0xaf, 0x48, 0x0c, 0x00, 0x00 + // mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] + .byte 0xd5, 0x48, 0x8b, 0xb7, 0x50, 0x0c, 0x00, 0x00 + // mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] + .byte 0xd5, 0x48, 0x8b, 0xbf, 0x58, 0x0c, 0x00, 0x00 + // mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x87, 0x60, 0x0c, 0x00, 0x00 + // mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x8f, 0x68, 0x0c, 0x00, 0x00 + // mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x97, 0x70, 0x0c, 0x00, 0x00 + // mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x9f, 0x78, 0x0c, 0x00, 0x00 + // mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xa7, 0x80, 0x0c, 0x00, 0x00 + // mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xaf, 0x88, 0x0c, 0x00, 0x00 + // mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xb7, 0x90, 0x0c, 0x00, 0x00 + // mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xbf, 0x98, 0x0c, 0x00, 0x00 + LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 7dad2c5967681..5d515a1434e00 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -60,6 +60,7 @@ using asm_sigcontext::_xstate; #if defined(XSTATE_SUPPORTED) || (defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS)) bool Xstate_IsAvx512Supported(); +bool Xstate_IsApxSupported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) #if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) @@ -469,6 +470,14 @@ struct sve_context { #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) #endif // XFEATURE_MASK_AVX512 +#ifndef XSTATE_APX +#define XSTATE_APX 19 +#endif // XSTATE_APX + +#ifndef XFEATURE_MASK_APX +#define XFEATURE_MASK_APX (1 << XSTATE_APX) +#endif // XFEATURE_MASK_APX + #if HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #define FPREG_FpxSwBytes_xfeatures(uc) FPREG_FpxSwBytes(uc)->xstate_bv #else @@ -491,7 +500,7 @@ struct Xstate_ExtendedFeature uint32_t size; }; -#define Xstate_ExtendedFeatures_Count (XSTATE_AVX512_ZMM + 1) +#define Xstate_ExtendedFeatures_Count (XSTATE_APX + 1) extern Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) @@ -628,6 +637,27 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } + +inline bool FPREG_HasApxRegisters(const ucontext_t *uc) +{ + if (!FPREG_HasExtendedState(uc)) + { + return false; + } + + if ((FPREG_FpxSwBytes_xfeatures(uc) & XFEATURE_MASK_APX) != XFEATURE_MASK_APX) + { + return false; + } + + return Xstate_IsApxSupported(); +} + +inline void *FPREG_Xstate_Egpr(const ucontext_t *uc, uint32_t *featureSize) +{ + _ASSERTE(FPREG_HasApxRegisters(uc)); + return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_APX); +} #endif // XSTATE_SUPPORTED && HOST_AMD64 ///////////////////// diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 65afa791bf8df..ecd252936299d 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -389,6 +389,59 @@ bool Xstate_IsAvx512Supported() return Xstate_Avx512Supported == 1; #endif } + +bool Xstate_IsApxSupported() +{ +#if defined(HAVE_MACH_EXCEPTIONS) + // TODO-xarch-apx: I assume OSX will never support APX + return false; +#else + static int Xstate_ApxSupported = -1; + + if (Xstate_ApxSupported == -1) + { + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + +#ifdef _DEBUG + // We should only be calling this function if we know the extended feature exists + __cpuid(cpuidInfo, 0x00000000); + _ASSERTE(static_cast(cpuidInfo[CPUID_EAX]) >= 0x0D); +#endif // _DEBUG + + __cpuidex(cpuidInfo, 0x0000000D, 0x00000000); + + if ((cpuidInfo[CPUID_EAX] & XSTATE_MASK_APX) == XSTATE_MASK_APX) + { + // Knight's Landing and Knight's Mill shipped without all 5 of the "baseline" + // AVX-512 ISAs that are required by x86-64-v4. Specifically they do not include + // BW, DQ, or VL. RyuJIT currently requires all 5 ISAs to be present so we will + // only enable Avx512 context save/restore when all exist. This requires us to + // query which ISAs are actually supported to ensure they're all present. + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + const int requiredApxFlags = (1 << 21); + + if ((cpuidInfo[CPUID_EDX] & requiredApxFlags) == requiredApxFlags) + { + Xstate_ApxSupported = 1; + } + } + + if (Xstate_ApxSupported == -1) + { + Xstate_ApxSupported = 0; + } + } + + return Xstate_ApxSupported == 1; +#endif +} #endif // XSTATE_SUPPORTED || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) #if !HAVE_MACH_EXCEPTIONS @@ -818,6 +871,18 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) dest = FPREG_Xstate_Hi16Zmm(native, &size); _ASSERT(size == (sizeof(M512) * 16)); memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); + +#ifndef TARGET_OSX + // TODO-xarch-apx: I suppose OSX will not support APX. + if (FPREG_HasApxRegisters(native)) + { + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX); + + dest = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); + } +#endif // !TARGET_OSX } } #elif defined(HOST_ARM64) @@ -1166,6 +1231,16 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } +#if !defined(TARGET_OSX) + if (FPREG_HasApxRegisters(native)) + { + src = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); + + lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; + } +#endif // TARGET_OSX } #elif defined(HOST_ARM64) if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) @@ -2118,10 +2193,14 @@ CONTEXT& CONTEXT::operator=(const CONTEXT& ctx) size_t copySize; if (ctx.ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) { - if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) + if ((ctx.XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX) { copySize = sizeof(CONTEXT); } + else if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) + { + copySize = offsetof(CONTEXT, Egpr16); + } else { copySize = offsetof(CONTEXT, KMask0); diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 03d6ab76d365b..dcbf4ec498134 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -78,6 +78,7 @@ private static class XArchIntrinsicConstants public const int Serialize = 0x20000; public const int Avx10v1 = 0x40000; public const int Evex = 0x80000; + public const int Apx = 0x100000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -135,6 +136,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v512"); if ((flags & Evex) != 0) builder.AddSupportedInstructionSet("evex"); + if ((flags & Apx) != 0) + builder.AddSupportedInstructionSet("apx"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -199,6 +202,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, + InstructionSet.X64_APX => Apx, + InstructionSet.X64_APX_X64 => Apx, // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index dd6a57731444e..fe151f54a7369 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,7 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V512=46, EVEX=47, + Apx=48, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 79e1a34afd165..1d8f2e8703ca3 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -122,6 +122,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X64_APX_X64: return ReadyToRunInstructionSet.Apx; default: throw new Exception("Unknown instruction set"); } @@ -196,6 +198,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X86_APX_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 0152638396fa7..38c4d0835ad2e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -79,6 +79,7 @@ public enum InstructionSet X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, + X64_APX = InstructionSet_X64.APX, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -106,6 +107,7 @@ public enum InstructionSet X64_AVX512VBMI_X64 = InstructionSet_X64.AVX512VBMI_X64, X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, + X64_APX_X64 = InstructionSet_X64.APX_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -144,6 +146,7 @@ public enum InstructionSet X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, + X86_APX = InstructionSet_X86.APX, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -171,6 +174,7 @@ public enum InstructionSet X86_AVX512VBMI_X64 = InstructionSet_X86.AVX512VBMI_X64, X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, + X86_APX_X64 = InstructionSet_X86.APX_X64, } public enum InstructionSet_ARM64 { @@ -245,33 +249,35 @@ public enum InstructionSet_X64 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512BW_X64 = 61, + AVX512CD_X64 = 62, + AVX512DQ_X64 = 63, + AVX512VBMI_X64 = 64, + AVX10v1_X64 = 65, + AVX10v1_V512_X64 = 66, + APX_X64 = 67, } public enum InstructionSet_X86 @@ -316,33 +322,35 @@ public enum InstructionSet_X86 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512BW_X64 = 61, + AVX512CD_X64 = 62, + AVX512DQ_X64 = 63, + AVX512VBMI_X64 = 64, + AVX10v1_X64 = 65, + AVX10v1_V512_X64 = 66, + APX_X64 = 67, } public unsafe struct InstructionSetFlags : IEnumerable @@ -670,6 +678,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX)) + resultflags.AddInstructionSet(InstructionSet.X64_APX_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1001,6 +1013,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1326,6 +1340,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); break; case TargetArchitecture.X86: @@ -1367,6 +1382,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); break; } } @@ -1452,6 +1468,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v1_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet.X64_APX)) + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1504,6 +1522,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1534,6 +1553,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X86_APX_X64); break; } } @@ -1835,6 +1855,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "VectorT512": { return InstructionSet.X64_VectorT512; } + case "Apx": + if (nestedTypeName == "X64") + { return InstructionSet.X64_APX_X64; } + else + { return InstructionSet.X64_APX; } + } break; @@ -1947,6 +1973,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "VectorT512": { return InstructionSet.X86_VectorT512; } + case "Apx": + { return InstructionSet.X86_APX; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 1876255732ab8..1e0f59c7f6714 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -22,6 +22,8 @@ ; DO NOT CHANGE R2R NUMERIC VALUES OF THE EXISTING SETS. Changing R2R numeric values definitions would be R2R format breaking change. +; The ISA definitions should also be mapped to `hwintrinsicIsaRangeArray` in hwintrinsic.cpp. + ; Definition of X86 instruction sets definearch ,X86 ,32Bit ,X64, X64 @@ -63,6 +65,7 @@ instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1_V512 instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 +instructionset ,X86 ,Apx , ,48 ,APX ,apx instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -91,6 +94,7 @@ instructionset64bit,X86 ,AVX512DQ instructionset64bit,X86 ,AVX512VBMI instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V512 +instructionset64bit,X86 ,APX vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 713e3401d65d4..4b38aeeaca52d 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -238,8 +238,15 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16) -#define SIZEOF__CONTEXT (3104) +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + +// /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + +// /*XSTATE*/ + 8 + 8 + +// /*XSTATE_AVX*/ 16*16 + +// /*XSTATE_AVX512_KMASK*/ 8*8 + +// /*XSTATE_AVX512_ZMM_H*/ 32*16 + +// /*XSTATE_AVX512_ZMM*/ 64*16 + +// /*XSTATE_APX*/ 8*16) +#define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: // (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index eb774f28fab4b..6da40b2d4801e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1416,6 +1416,12 @@ void EEJitManager::SetCpuInfo() } } } + #if defined(TARGET_AMD64) + if ((cpuFeatures & XArchIntrinsicConstants_Apx) != 0) + { + CPUCompileFlags.Set(InstructionSet_APX); + } + #endif // TARGET_AMD64 #elif defined(TARGET_ARM64) #if !defined(TARGET_WINDOWS) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 6065b711e49bb..6bc046e2b40bd 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -71,6 +71,16 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #define IS_VALID_WRITE_PTR(addr, size) _ASSERTE((addr) != NULL) #define IS_VALID_CODE_PTR(addr) _ASSERTE((addr) != NULL) +#if defined(TARGET_AMD64) || defined(TARGET_X86) +// These values should be picked up from winnt.h, defining them in case they are missing there. +#ifndef XSTATE_APX +#define XSTATE_APX (19) +#endif // XSTATE_APX + +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (1 << XSTATE_APX) +#endif // XSTATE_MASK_APX +#endif // TARGET_AMD64 || TARGET_X86 void ThreadSuspend::SetSuspendRuntimeInProgress() { @@ -1961,7 +1971,7 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) DWORD context = CONTEXT_COMPLETE; #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; @@ -2900,7 +2910,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) // The system silently ignores any feature specified in the FeatureMask // which is not enabled on the processor. #if defined(TARGET_X86) || defined(TARGET_AMD64) - SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512); + SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX); #elif defined(TARGET_ARM64) if (g_pfnSetXStateFeaturesMask != NULL) { @@ -3051,7 +3061,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT if (srcFeatures != 0) { #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; #endif diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d6a063ce4d2f..1d1bbf9e9bc2a 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -72,6 +72,10 @@ static uint32_t xmmYmmStateSupport() #define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ #endif // XSTATE_MASK_AVX512 +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + static uint32_t avx512StateSupport() { #if defined(HOST_APPLE) @@ -99,6 +103,23 @@ static uint32_t avx512StateSupport() #endif } +static uint32_t apxStateSupport() +{ +#if defined(HOST_APPLE) + return 0; +#elif defined(TARGET_X86) + return 0; +#else + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + return ((eax & 0x80000) == 0x80000) ? 1 : 0; +#endif // TARGET_AMD64 +} + static bool IsAvxEnabled() { return true; @@ -108,6 +129,15 @@ static bool IsAvx512Enabled() { return true; } + +static bool IsApxEnabled() +{ +#if defined(TARGET_X86) + return false; +#else + return true; +#endif // TARGET_AMD64 +} #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_UNIX @@ -125,6 +155,15 @@ static uint32_t avx512StateSupport() return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0; } +static uint32_t apxStateSupport() +{ +#if defined(TARGET_X86) + return 0; +#else + return ((_xgetbv(0) & 0x80000) == 0x80000) ? 1 : 0; +#endif +} + static bool IsAvxEnabled() { DWORD64 FeatureMask = GetEnabledXStateFeatures(); @@ -137,6 +176,22 @@ static bool IsAvx512Enabled() return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } +// TODO-XArch-APX: +// we will eventually need to remove this macro when windows officially supports APX. +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + +static bool IsApxEnabled() +{ +#ifdef TARGET_X86 + return false; +#else + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & XSTATE_MASK_APX) != 0); +#endif +} + #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_WINDOWS @@ -252,6 +307,14 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } + if (IsApxEnabled() && apxStateSupport()) + { + if ((cpuidInfo[CPUID_EDX] & (1 << 21)) != 0) // Apx + { + result |= XArchIntrinsicConstants_Apx; + } + } + if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 { __cpuidex(cpuidInfo, 0x00000024, 0x00000000); diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 6422fe33f9787..ef56c3baa95ba 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -31,6 +31,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Serialize = 0x20000, XArchIntrinsicConstants_Avx10v1 = 0x40000, XArchIntrinsicConstants_Evex = 0x80000, + XArchIntrinsicConstants_Apx = 0x100000, }; #endif // HOST_X86 || HOST_AMD64