Retired Document
Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.
Relevant replacement documents include:
PublicUtility/CAVectorUnit.cpp
/* |
<codex> |
<abstract>CAVectorUnit.h</abstract> |
<\codex> |
*/ |
#include "CAVectorUnit.h" |
#if !TARGET_OS_WIN32 |
#include <sys/sysctl.h> |
#elif HAS_IPP |
#include "ippdefs.h" |
#include "ippcore.h" |
#endif |
int gCAVectorUnitType = kVecUninitialized; |
#if TARGET_OS_WIN32 |
// Use cpuid to check if SSE2 is available. |
// Before calling this function make sure cpuid is available |
static SInt32 IsSSE2Available() |
{ |
int return_value; |
{ |
int r_edx; |
_asm |
{ |
mov eax, 0x01 |
cpuid |
mov r_edx, edx |
} |
return_value = (r_edx >> 26) & 0x1; |
} |
return return_value; |
} |
// Use cpuid to check if SSE3 is available. |
// Before calling this function make sure cpuid is available |
static SInt32 IsSSE3Available() |
{ |
SInt32 return_value; |
{ |
SInt32 r_ecx; |
_asm |
{ |
mov eax, 0x01 |
cpuid |
mov r_ecx, ecx |
} |
return_value = r_ecx & 0x1; |
} |
return return_value; |
} |
// Return true if the cpuid instruction is available. |
// The cpuid instruction is available if bit 21 in the EFLAGS register can be changed |
// This function may not work on Intel CPUs prior to Pentium (didn't test) |
static bool IsCpuidAvailable() |
{ |
SInt32 return_value = 0x0; |
_asm{ |
pushfd ; //push original EFLAGS |
pop eax ; //get original EFLAGS |
mov ecx, eax ; //save original EFLAGS |
xor eax, 200000h ; //flip ID bit in EFLAGS |
push eax ; //save new EFLAGS value on stack |
popfd ; //replace current EFLAGS value |
pushfd ; //get new EFLAGS |
pop eax ; //store new EFLAGS in EAX |
xor eax, ecx ; |
je end_cpuid_identify ; //can't toggle ID bit |
mov return_value, 0x1; |
end_cpuid_identify: |
nop; |
} |
return return_value; |
} |
#endif |
SInt32 CAVectorUnit_Examine() |
{ |
int result = kVecNone; |
#if TARGET_OS_WIN32 |
#if HAS_IPP |
// Initialize the static IPP library! This needs to be done before |
// any IPP function calls, otherwise we may have a performance penalty |
int status = ippStaticInit(); |
if ( status == ippStsNonIntelCpu ) |
{ |
IppCpuType cpuType = ippGetCpuType(); |
if ( cpuType >= ippCpuSSE || cpuType <= ippCpuSSE42 ) |
ippStaticInitCpu( cpuType ); |
} |
#endif |
{ |
// On Windows we use cpuid to detect the vector unit because it works on Intel and AMD. |
// The IPP library does not detect SSE on AMD processors. |
if (IsCpuidAvailable()) |
{ |
if(IsSSE3Available()) |
{ |
result = kVecSSE3; |
} |
else if(IsSSE2Available()) |
{ |
result = kVecSSE2; |
} |
} |
} |
#elif TARGET_OS_MAC |
#if DEBUG |
if (getenv("CA_NoVector")) { |
fprintf(stderr, "CA_NoVector set; Vector unit optimized routines will be bypassed\n"); |
return result; |
} |
else |
#endif |
{ |
#if (TARGET_CPU_PPC || TARGET_CPU_PPC64) |
int sels[2] = { CTL_HW, HW_VECTORUNIT }; |
int vType = 0; //0 == scalar only |
size_t length = sizeof(vType); |
int error = sysctl(sels, 2, &vType, &length, NULL, 0); |
if (!error && vType > 0) |
result = kVecAltivec; |
#elif (TARGET_CPU_X86 || TARGET_CPU_X86_64) |
int answer = 0; |
size_t length = sizeof(answer); |
int error = sysctlbyname("hw.optional.sse3", &answer, &length, NULL, 0); |
if (!error && answer) |
result = kVecSSE3; |
else { |
answer = 0; |
length = sizeof(answer); |
error = sysctlbyname("hw.optional.sse2", &answer, &length, NULL, 0); |
if (!error && answer) |
result = kVecSSE2; |
} |
#elif (TARGET_CPU_ARM) && defined(_ARM_ARCH_7) |
result = kVecNeon; |
#endif |
} |
#endif |
gCAVectorUnitType = result; |
return result; |
} |
Copyright © 2012 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2012-10-08