PublicUtility/CAVectorUnit.cpp
/* |
Copyright (C) 2016 Apple Inc. All Rights Reserved. |
See LICENSE.txt for this sample’s licensing information |
Abstract: |
Part of Core Audio Public Utility Classes |
*/ |
#include "CAVectorUnit.h" |
#if !TARGET_OS_WIN32 |
#include <sys/sysctl.h> |
#elif HAS_IPP |
#include "ippdefs.h" |
#include "ippcore.h" |
#endif |
int gCAVectorUnitType = kVecUninitialized; |
#if TARGET_OS_WIN32 |
// Use cpuid to check if SSE2 is available. |
// Before calling this function make sure cpuid is available |
static SInt32 IsSSE2Available() |
{ |
int return_value; |
{ |
int r_edx; |
_asm |
{ |
mov eax, 0x01 |
cpuid |
mov r_edx, edx |
} |
return_value = (r_edx >> 26) & 0x1; |
} |
return return_value; |
} |
// Use cpuid to check if SSE3 is available. |
// Before calling this function make sure cpuid is available |
static SInt32 IsSSE3Available() |
{ |
SInt32 return_value; |
{ |
SInt32 r_ecx; |
_asm |
{ |
mov eax, 0x01 |
cpuid |
mov r_ecx, ecx |
} |
return_value = r_ecx & 0x1; |
} |
return return_value; |
} |
// Return true if the cpuid instruction is available. |
// The cpuid instruction is available if bit 21 in the EFLAGS register can be changed |
// This function may not work on Intel CPUs prior to Pentium (didn't test) |
static bool IsCpuidAvailable() |
{ |
SInt32 return_value = 0x0; |
_asm{ |
pushfd ; //push original EFLAGS |
pop eax ; //get original EFLAGS |
mov ecx, eax ; //save original EFLAGS |
xor eax, 200000h ; //flip ID bit in EFLAGS |
push eax ; //save new EFLAGS value on stack |
popfd ; //replace current EFLAGS value |
pushfd ; //get new EFLAGS |
pop eax ; //store new EFLAGS in EAX |
xor eax, ecx ; |
je end_cpuid_identify ; //can't toggle ID bit |
mov return_value, 0x1; |
end_cpuid_identify: |
nop; |
} |
return return_value; |
} |
#endif |
SInt32 CAVectorUnit_Examine() |
{ |
int result = kVecNone; |
#if TARGET_OS_WIN32 |
#if HAS_IPP |
// Initialize the static IPP library! This needs to be done before |
// any IPP function calls, otherwise we may have a performance penalty |
int status = ippStaticInit(); |
if ( status == ippStsNonIntelCpu ) |
{ |
IppCpuType cpuType = ippGetCpuType(); |
if ( cpuType >= ippCpuSSE || cpuType <= ippCpuSSE42 ) |
ippStaticInitCpu( cpuType ); |
} |
#endif |
{ |
// On Windows we use cpuid to detect the vector unit because it works on Intel and AMD. |
// The IPP library does not detect SSE on AMD processors. |
if (IsCpuidAvailable()) |
{ |
if(IsSSE3Available()) |
{ |
result = kVecSSE3; |
} |
else if(IsSSE2Available()) |
{ |
result = kVecSSE2; |
} |
} |
} |
#elif TARGET_OS_MAC |
#if DEBUG |
if (getenv("CA_NoVector")) { |
fprintf(stderr, "CA_NoVector set; Vector unit optimized routines will be bypassed\n"); |
return result; |
} |
else |
#endif |
{ |
#if (TARGET_CPU_PPC || TARGET_CPU_PPC64) |
int sels[2] = { CTL_HW, HW_VECTORUNIT }; |
int vType = 0; //0 == scalar only |
size_t length = sizeof(vType); |
int error = sysctl(sels, 2, &vType, &length, NULL, 0); |
if (!error && vType > 0) |
result = kVecAltivec; |
#elif (TARGET_CPU_X86 || TARGET_CPU_X86_64) |
static const struct { const char* kName; const int kVectype; } kStringVectypes[] = { |
{ "hw.optional.avx1_0", kVecAVX1 }, { "hw.optional.sse3", kVecSSE3 }, { "hw.optional.sse2", kVecSSE2 } |
}; |
static const size_t kNumStringVectypes = sizeof(kStringVectypes)/sizeof(kStringVectypes[0]); |
int i = 0, answer = 0; |
while(i != kNumStringVectypes) |
{ |
size_t length = sizeof(answer); |
int error = sysctlbyname(kStringVectypes[i].kName, &answer, &length, NULL, 0); |
if (!error && answer) |
{ |
result = kStringVectypes[i].kVectype; |
break; |
} |
++i; |
}; |
#elif CA_ARM_NEON |
result = kVecNeon; |
#endif |
} |
#endif |
gCAVectorUnitType = result; |
return result; |
} |
Copyright © 2016 Apple Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2016-02-19