forked from microsoft/DirectXMath
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDirectXMathSSE3.h
111 lines (89 loc) · 2.31 KB
/
DirectXMathSSE3.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//-------------------------------------------------------------------------------------
// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#ifdef _MSC_VER
#pragma once
#endif
#ifdef _M_ARM
#error SSE3 not supported on ARM platform
#endif
#pragma warning(push)
#pragma warning(disable : 4987)
#include <intrin.h>
#pragma warning(pop)
#include <pmmintrin.h>
#include <DirectXMath.h>
namespace DirectX
{
namespace SSE3
{
inline bool XMVerifySSE3Support()
{
// Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
int CPUInfo[4] = {-1};
__cpuid( CPUInfo, 0 );
if ( CPUInfo[0] < 1 )
return false;
__cpuid(CPUInfo, 1 );
// We only check for SSE3 instruction set. SSSE3 instructions are not used.
return ( (CPUInfo[2] & 0x1) != 0 );
}
inline XMVECTOR XM_CALLCONV XMVector2Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
}
inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
{
return SSE3::XMVector2Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector3Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_and_ps( vTemp, g_XMMask3 );
vTemp = _mm_hadd_ps(vTemp,vTemp);
return _mm_hadd_ps(vTemp,vTemp);
}
inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
{
return SSE3::XMVector3Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVector4Dot
(
FXMVECTOR V1,
FXMVECTOR V2
)
{
XMVECTOR vTemp = _mm_mul_ps(V1,V2);
vTemp = _mm_hadd_ps( vTemp, vTemp );
return _mm_hadd_ps( vTemp, vTemp );
}
inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
{
return SSE3::XMVector4Dot(V, V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
{
return _mm_moveldup_ps(V);
}
inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
{
return _mm_movehdup_ps(V);
}
} // namespace SSE3
} // namespace DirectX;