chore: ⬆️ updated deps
This commit is contained in:
+12
@@ -0,0 +1,12 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build darwin && arm64 && gc
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0
|
||||
JMP libc_sysctlbyname(SB)
|
||||
GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8
|
||||
DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB)
|
||||
-3
@@ -92,9 +92,6 @@ var ARM64 struct {
|
||||
HasSHA2 bool // SHA2 hardware implementation
|
||||
HasCRC32 bool // CRC32 hardware implementation
|
||||
HasATOMICS bool // Atomic memory operation instruction set
|
||||
HasHPDS bool // Hierarchical permission disables in translations tables
|
||||
HasLOR bool // Limited ordering regions
|
||||
HasPAN bool // Privileged access never
|
||||
HasFPHP bool // Half precision floating-point instruction set
|
||||
HasASIMDHP bool // Advanced SIMD half precision instruction set
|
||||
HasCPUID bool // CPUID identification scheme registers
|
||||
|
||||
+5
-24
@@ -44,14 +44,11 @@ func initOptions() {
|
||||
}
|
||||
|
||||
func archInit() {
|
||||
switch runtime.GOOS {
|
||||
case "freebsd":
|
||||
if runtime.GOOS == "freebsd" {
|
||||
readARM64Registers()
|
||||
case "linux", "netbsd", "openbsd":
|
||||
} else {
|
||||
// Most platforms don't seem to allow directly reading these registers.
|
||||
doinit()
|
||||
default:
|
||||
// Many platforms don't seem to allow reading these registers.
|
||||
setMinimalFeatures()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,10 +62,10 @@ func setMinimalFeatures() {
|
||||
func readARM64Registers() {
|
||||
Initialized = true
|
||||
|
||||
parseARM64SystemRegisters(getisar0(), getisar1(), getmmfr1(), getpfr0())
|
||||
parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0())
|
||||
}
|
||||
|
||||
func parseARM64SystemRegisters(isar0, isar1, mmfr1, pfr0 uint64) {
|
||||
func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
|
||||
// ID_AA64ISAR0_EL1
|
||||
switch extractBits(isar0, 4, 7) {
|
||||
case 1:
|
||||
@@ -152,22 +149,6 @@ func parseARM64SystemRegisters(isar0, isar1, mmfr1, pfr0 uint64) {
|
||||
ARM64.HasI8MM = true
|
||||
}
|
||||
|
||||
// ID_AA64MMFR1_EL1
|
||||
switch extractBits(mmfr1, 12, 15) {
|
||||
case 1, 2:
|
||||
ARM64.HasHPDS = true
|
||||
}
|
||||
|
||||
switch extractBits(mmfr1, 16, 19) {
|
||||
case 1:
|
||||
ARM64.HasLOR = true
|
||||
}
|
||||
|
||||
switch extractBits(mmfr1, 20, 23) {
|
||||
case 1, 2, 3:
|
||||
ARM64.HasPAN = true
|
||||
}
|
||||
|
||||
// ID_AA64PFR0_EL1
|
||||
switch extractBits(pfr0, 16, 19) {
|
||||
case 0:
|
||||
|
||||
-7
@@ -20,13 +20,6 @@ TEXT ·getisar1(SB),NOSPLIT,$0-8
|
||||
MOVD R0, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func getmmfr1() uint64
|
||||
TEXT ·getmmfr1(SB),NOSPLIT,$0-8
|
||||
// get Memory Model Feature Register 1 into x0
|
||||
MRS ID_AA64MMFR1_EL1, R0
|
||||
MOVD R0, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func getpfr0() uint64
|
||||
TEXT ·getpfr0(SB),NOSPLIT,$0-8
|
||||
// get Processor Feature Register 0 into x0
|
||||
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build darwin && arm64 && gc
|
||||
|
||||
package cpu
|
||||
|
||||
func doinit() {
|
||||
setMinimalFeatures()
|
||||
|
||||
// The feature flags are explained in [Instruction Set Detection].
|
||||
// There are some differences between MacOS versions:
|
||||
//
|
||||
// MacOS 11 and 12 do not have "hw.optional" sysctl values for some of the features.
|
||||
//
|
||||
// MacOS 13 changed some of the naming conventions to align with ARM Architecture Reference Manual.
|
||||
// For example "hw.optional.armv8_2_sha512" became "hw.optional.arm.FEAT_SHA512".
|
||||
// It currently checks both to stay compatible with MacOS 11 and 12.
|
||||
// The old names also work with MacOS 13, however it's not clear whether
|
||||
// they will continue working with future OS releases.
|
||||
//
|
||||
// Once MacOS 12 is no longer supported the old names can be removed.
|
||||
//
|
||||
// [Instruction Set Detection]: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
|
||||
|
||||
// Encryption, hashing and checksum capabilities
|
||||
|
||||
// For the following flags there are no MacOS 11 sysctl flags.
|
||||
ARM64.HasAES = true || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_AES\x00"))
|
||||
ARM64.HasPMULL = true || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_PMULL\x00"))
|
||||
ARM64.HasSHA1 = true || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SHA1\x00"))
|
||||
ARM64.HasSHA2 = true || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SHA256\x00"))
|
||||
|
||||
ARM64.HasSHA3 = darwinSysctlEnabled([]byte("hw.optional.armv8_2_sha3\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SHA3\x00"))
|
||||
ARM64.HasSHA512 = darwinSysctlEnabled([]byte("hw.optional.armv8_2_sha512\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SHA512\x00"))
|
||||
|
||||
ARM64.HasCRC32 = darwinSysctlEnabled([]byte("hw.optional.armv8_crc32\x00"))
|
||||
|
||||
// Atomic and memory ordering
|
||||
ARM64.HasATOMICS = darwinSysctlEnabled([]byte("hw.optional.armv8_1_atomics\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_LSE\x00"))
|
||||
ARM64.HasLRCPC = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_LRCPC\x00"))
|
||||
|
||||
// SIMD and floating point capabilities
|
||||
ARM64.HasFPHP = darwinSysctlEnabled([]byte("hw.optional.neon_fp16\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_FP16\x00"))
|
||||
ARM64.HasASIMDHP = darwinSysctlEnabled([]byte("hw.optional.neon_hpfp\x00")) || darwinSysctlEnabled([]byte("hw.optional.AdvSIMD_HPFPCvt\x00"))
|
||||
ARM64.HasASIMDRDM = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_RDM\x00"))
|
||||
ARM64.HasASIMDDP = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_DotProd\x00"))
|
||||
ARM64.HasASIMDFHM = darwinSysctlEnabled([]byte("hw.optional.armv8_2_fhm\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_FHM\x00"))
|
||||
ARM64.HasI8MM = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_I8MM\x00"))
|
||||
|
||||
ARM64.HasJSCVT = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_JSCVT\x00"))
|
||||
ARM64.HasFCMA = darwinSysctlEnabled([]byte("hw.optional.armv8_3_compnum\x00")) || darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_FCMA\x00"))
|
||||
|
||||
// Miscellaneous
|
||||
ARM64.HasDCPOP = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_DPB\x00"))
|
||||
ARM64.HasEVTSTRM = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_ECV\x00"))
|
||||
ARM64.HasDIT = darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_DIT\x00"))
|
||||
|
||||
// Not supported, but added for completeness
|
||||
ARM64.HasCPUID = false
|
||||
|
||||
ARM64.HasSM3 = false // darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SM3\x00"))
|
||||
ARM64.HasSM4 = false // darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SM4\x00"))
|
||||
ARM64.HasSVE = false // darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SVE\x00"))
|
||||
ARM64.HasSVE2 = false // darwinSysctlEnabled([]byte("hw.optional.arm.FEAT_SVE2\x00"))
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build darwin && arm64 && !gc
|
||||
|
||||
package cpu
|
||||
|
||||
import "runtime"
|
||||
|
||||
func doinit() {
|
||||
setMinimalFeatures()
|
||||
|
||||
ARM64.HasASIMD = true
|
||||
ARM64.HasFP = true
|
||||
|
||||
// Go already assumes these to be available because they were on the M1
|
||||
// and these are supported on all Apple arm64 chips.
|
||||
ARM64.HasAES = true
|
||||
ARM64.HasPMULL = true
|
||||
ARM64.HasSHA1 = true
|
||||
ARM64.HasSHA2 = true
|
||||
|
||||
if runtime.GOOS != "ios" {
|
||||
// Apple A7 processors do not support these, however
|
||||
// M-series SoCs are at least armv8.4-a
|
||||
ARM64.HasCRC32 = true // armv8.1
|
||||
ARM64.HasATOMICS = true // armv8.2
|
||||
ARM64.HasJSCVT = true // armv8.3, if HasFP
|
||||
}
|
||||
}
|
||||
-1
@@ -8,6 +8,5 @@ package cpu
|
||||
|
||||
func getisar0() uint64
|
||||
func getisar1() uint64
|
||||
func getmmfr1() uint64
|
||||
func getpfr0() uint64
|
||||
func getzfr0() uint64
|
||||
|
||||
+1
-1
@@ -8,5 +8,5 @@ package cpu
|
||||
|
||||
func getisar0() uint64 { return 0 }
|
||||
func getisar1() uint64 { return 0 }
|
||||
func getmmfr1() uint64 { return 0 }
|
||||
func getpfr0() uint64 { return 0 }
|
||||
func getzfr0() uint64 { return 0 }
|
||||
|
||||
+1
-1
@@ -167,7 +167,7 @@ func doinit() {
|
||||
setMinimalFeatures()
|
||||
return
|
||||
}
|
||||
parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64mmfr1, cpuid.aa64pfr0)
|
||||
parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64pfr0)
|
||||
|
||||
Initialized = true
|
||||
}
|
||||
|
||||
+1
-1
@@ -59,7 +59,7 @@ func doinit() {
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
parseARM64SystemRegisters(isar0, isar1, 0, 0)
|
||||
parseARM64SystemRegisters(isar0, isar1, 0)
|
||||
|
||||
Initialized = true
|
||||
}
|
||||
|
||||
+4
-2
@@ -2,8 +2,10 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !linux && !netbsd && !openbsd && arm64
|
||||
//go:build !darwin && !linux && !netbsd && !openbsd && !windows && arm64
|
||||
|
||||
package cpu
|
||||
|
||||
func doinit() {}
|
||||
func doinit() {
|
||||
setMinimalFeatures()
|
||||
}
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
//go:generate go run golang.org/x/sys/windows/mkwinsyscall -systemdll=false -output zcpu_windows.go cpu_windows.go
|
||||
|
||||
//sys isProcessorFeaturePresent(ProcessorFeature uint32) (ret bool) = kernel32.IsProcessorFeaturePresent
|
||||
|
||||
// The processor features to be tested for IsProcessorFeaturePresent, see
|
||||
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
|
||||
const (
|
||||
_PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30
|
||||
_PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31
|
||||
_PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34
|
||||
_PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43
|
||||
|
||||
_PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44
|
||||
_PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE = 45
|
||||
_PF_ARM_SVE_INSTRUCTIONS_AVAILABLE = 46
|
||||
_PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE = 47
|
||||
|
||||
_PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE = 64
|
||||
_PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE = 65
|
||||
)
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cpu
|
||||
|
||||
func doinit() {
|
||||
// set HasASIMD and HasFP to true as per
|
||||
// https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#base-requirements
|
||||
//
|
||||
// The ARM64 version of Windows always presupposes that it's running on an ARMv8 or later architecture.
|
||||
// Both floating-point and NEON support are presumed to be present in hardware.
|
||||
//
|
||||
ARM64.HasASIMD = true
|
||||
ARM64.HasFP = true
|
||||
|
||||
if isProcessorFeaturePresent(_PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) {
|
||||
ARM64.HasAES = true
|
||||
ARM64.HasPMULL = true
|
||||
ARM64.HasSHA1 = true
|
||||
ARM64.HasSHA2 = true
|
||||
}
|
||||
ARM64.HasSHA3 = isProcessorFeaturePresent(_PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE)
|
||||
ARM64.HasCRC32 = isProcessorFeaturePresent(_PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)
|
||||
ARM64.HasSHA512 = isProcessorFeaturePresent(_PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE)
|
||||
ARM64.HasATOMICS = isProcessorFeaturePresent(_PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)
|
||||
if isProcessorFeaturePresent(_PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) {
|
||||
ARM64.HasASIMDDP = true
|
||||
ARM64.HasASIMDRDM = true
|
||||
}
|
||||
if isProcessorFeaturePresent(_PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) {
|
||||
ARM64.HasLRCPC = true
|
||||
ARM64.HasSM3 = true
|
||||
}
|
||||
ARM64.HasSVE = isProcessorFeaturePresent(_PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
|
||||
ARM64.HasSVE2 = isProcessorFeaturePresent(_PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
|
||||
ARM64.HasJSCVT = isProcessorFeaturePresent(_PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)
|
||||
}
|
||||
+122
-48
@@ -64,6 +64,80 @@ func initOptions() {
|
||||
|
||||
func archInit() {
|
||||
|
||||
// From internal/cpu
|
||||
const (
|
||||
// eax bits
|
||||
cpuid_AVXVNNI = 1 << 4
|
||||
|
||||
// ecx bits
|
||||
cpuid_SSE3 = 1 << 0
|
||||
cpuid_PCLMULQDQ = 1 << 1
|
||||
cpuid_AVX512VBMI = 1 << 1
|
||||
cpuid_AVX512VBMI2 = 1 << 6
|
||||
cpuid_SSSE3 = 1 << 9
|
||||
cpuid_AVX512GFNI = 1 << 8
|
||||
cpuid_AVX512VAES = 1 << 9
|
||||
cpuid_AVX512VNNI = 1 << 11
|
||||
cpuid_AVX512BITALG = 1 << 12
|
||||
cpuid_FMA = 1 << 12
|
||||
cpuid_AVX512VPOPCNTDQ = 1 << 14
|
||||
cpuid_SSE41 = 1 << 19
|
||||
cpuid_SSE42 = 1 << 20
|
||||
cpuid_POPCNT = 1 << 23
|
||||
cpuid_AES = 1 << 25
|
||||
cpuid_OSXSAVE = 1 << 27
|
||||
cpuid_AVX = 1 << 28
|
||||
|
||||
// "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0
|
||||
cpuid_BMI1 = 1 << 3
|
||||
cpuid_AVX2 = 1 << 5
|
||||
cpuid_BMI2 = 1 << 8
|
||||
cpuid_ERMS = 1 << 9
|
||||
cpuid_AVX512F = 1 << 16
|
||||
cpuid_AVX512DQ = 1 << 17
|
||||
cpuid_ADX = 1 << 19
|
||||
cpuid_AVX512CD = 1 << 28
|
||||
cpuid_SHA = 1 << 29
|
||||
cpuid_AVX512BW = 1 << 30
|
||||
cpuid_AVX512VL = 1 << 31
|
||||
|
||||
// "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0
|
||||
cpuid_AVX512_VBMI = 1 << 1
|
||||
cpuid_AVX512_VBMI2 = 1 << 6
|
||||
cpuid_GFNI = 1 << 8
|
||||
cpuid_AVX512VPCLMULQDQ = 1 << 10
|
||||
cpuid_AVX512_BITALG = 1 << 12
|
||||
|
||||
// edx bits
|
||||
cpuid_FSRM = 1 << 4
|
||||
// edx bits for CPUID 0x80000001
|
||||
cpuid_RDTSCP = 1 << 27
|
||||
)
|
||||
// Additional constants not in internal/cpu
|
||||
const (
|
||||
// eax=1: edx
|
||||
cpuid_SSE2 = 1 << 26
|
||||
// eax=1: ecx
|
||||
cpuid_CX16 = 1 << 13
|
||||
cpuid_RDRAND = 1 << 30
|
||||
// eax=7,ecx=0: ebx
|
||||
cpuid_RDSEED = 1 << 18
|
||||
cpuid_AVX512IFMA = 1 << 21
|
||||
cpuid_AVX512PF = 1 << 26
|
||||
cpuid_AVX512ER = 1 << 27
|
||||
// eax=7,ecx=0: edx
|
||||
cpuid_AVX5124VNNIW = 1 << 2
|
||||
cpuid_AVX5124FMAPS = 1 << 3
|
||||
cpuid_AMXBF16 = 1 << 22
|
||||
cpuid_AMXTile = 1 << 24
|
||||
cpuid_AMXInt8 = 1 << 25
|
||||
// eax=7,ecx=1: eax
|
||||
cpuid_AVX512BF16 = 1 << 5
|
||||
cpuid_AVXIFMA = 1 << 23
|
||||
// eax=7,ecx=1: edx
|
||||
cpuid_AVXVNNIInt8 = 1 << 4
|
||||
)
|
||||
|
||||
Initialized = true
|
||||
|
||||
maxID, _, _, _ := cpuid(0, 0)
|
||||
@@ -73,90 +147,90 @@ func archInit() {
|
||||
}
|
||||
|
||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
||||
X86.HasSSE2 = isSet(26, edx1)
|
||||
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
|
||||
|
||||
X86.HasSSE3 = isSet(0, ecx1)
|
||||
X86.HasPCLMULQDQ = isSet(1, ecx1)
|
||||
X86.HasSSSE3 = isSet(9, ecx1)
|
||||
X86.HasFMA = isSet(12, ecx1)
|
||||
X86.HasCX16 = isSet(13, ecx1)
|
||||
X86.HasSSE41 = isSet(19, ecx1)
|
||||
X86.HasSSE42 = isSet(20, ecx1)
|
||||
X86.HasPOPCNT = isSet(23, ecx1)
|
||||
X86.HasAES = isSet(25, ecx1)
|
||||
X86.HasOSXSAVE = isSet(27, ecx1)
|
||||
X86.HasRDRAND = isSet(30, ecx1)
|
||||
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
|
||||
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
|
||||
X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
|
||||
X86.HasFMA = isSet(ecx1, cpuid_FMA)
|
||||
X86.HasCX16 = isSet(ecx1, cpuid_CX16)
|
||||
X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
|
||||
X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
|
||||
X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
|
||||
X86.HasAES = isSet(ecx1, cpuid_AES)
|
||||
X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
|
||||
X86.HasRDRAND = isSet(ecx1, cpuid_RDRAND)
|
||||
|
||||
var osSupportsAVX, osSupportsAVX512 bool
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
if X86.HasOSXSAVE {
|
||||
eax, _ := xgetbv()
|
||||
// Check if XMM and YMM registers have OS support.
|
||||
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
|
||||
osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
|
||||
|
||||
if runtime.GOOS == "darwin" {
|
||||
// Darwin requires special AVX512 checks, see cpu_darwin_x86.go
|
||||
osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512()
|
||||
} else {
|
||||
// Check if OPMASK and ZMM registers have OS support.
|
||||
osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
|
||||
osSupportsAVX512 = osSupportsAVX && isSet(eax, 1<<5) && isSet(eax, 1<<6) && isSet(eax, 1<<7)
|
||||
}
|
||||
}
|
||||
|
||||
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
|
||||
X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
|
||||
|
||||
if maxID < 7 {
|
||||
return
|
||||
}
|
||||
|
||||
eax7, ebx7, ecx7, edx7 := cpuid(7, 0)
|
||||
X86.HasBMI1 = isSet(3, ebx7)
|
||||
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
|
||||
X86.HasBMI2 = isSet(8, ebx7)
|
||||
X86.HasERMS = isSet(9, ebx7)
|
||||
X86.HasRDSEED = isSet(18, ebx7)
|
||||
X86.HasADX = isSet(19, ebx7)
|
||||
X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
|
||||
X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
|
||||
X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
|
||||
X86.HasERMS = isSet(ebx7, cpuid_ERMS)
|
||||
X86.HasRDSEED = isSet(ebx7, cpuid_RDSEED)
|
||||
X86.HasADX = isSet(ebx7, cpuid_ADX)
|
||||
|
||||
X86.HasAVX512 = isSet(16, ebx7) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
|
||||
X86.HasAVX512 = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
|
||||
if X86.HasAVX512 {
|
||||
X86.HasAVX512F = true
|
||||
X86.HasAVX512CD = isSet(28, ebx7)
|
||||
X86.HasAVX512ER = isSet(27, ebx7)
|
||||
X86.HasAVX512PF = isSet(26, ebx7)
|
||||
X86.HasAVX512VL = isSet(31, ebx7)
|
||||
X86.HasAVX512BW = isSet(30, ebx7)
|
||||
X86.HasAVX512DQ = isSet(17, ebx7)
|
||||
X86.HasAVX512IFMA = isSet(21, ebx7)
|
||||
X86.HasAVX512VBMI = isSet(1, ecx7)
|
||||
X86.HasAVX5124VNNIW = isSet(2, edx7)
|
||||
X86.HasAVX5124FMAPS = isSet(3, edx7)
|
||||
X86.HasAVX512VPOPCNTDQ = isSet(14, ecx7)
|
||||
X86.HasAVX512VPCLMULQDQ = isSet(10, ecx7)
|
||||
X86.HasAVX512VNNI = isSet(11, ecx7)
|
||||
X86.HasAVX512GFNI = isSet(8, ecx7)
|
||||
X86.HasAVX512VAES = isSet(9, ecx7)
|
||||
X86.HasAVX512VBMI2 = isSet(6, ecx7)
|
||||
X86.HasAVX512BITALG = isSet(12, ecx7)
|
||||
X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD)
|
||||
X86.HasAVX512ER = isSet(ebx7, cpuid_AVX512ER)
|
||||
X86.HasAVX512PF = isSet(ebx7, cpuid_AVX512PF)
|
||||
X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
|
||||
X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
|
||||
X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
|
||||
X86.HasAVX512IFMA = isSet(ebx7, cpuid_AVX512IFMA)
|
||||
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
|
||||
X86.HasAVX5124VNNIW = isSet(edx7, cpuid_AVX5124VNNIW)
|
||||
X86.HasAVX5124FMAPS = isSet(edx7, cpuid_AVX5124FMAPS)
|
||||
X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
|
||||
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
|
||||
X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
|
||||
X86.HasAVX512GFNI = isSet(ecx7, cpuid_AVX512GFNI)
|
||||
X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
|
||||
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
|
||||
X86.HasAVX512BITALG = isSet(ecx7, cpuid_AVX512BITALG)
|
||||
}
|
||||
|
||||
X86.HasAMXTile = isSet(24, edx7)
|
||||
X86.HasAMXInt8 = isSet(25, edx7)
|
||||
X86.HasAMXBF16 = isSet(22, edx7)
|
||||
X86.HasAMXTile = isSet(edx7, cpuid_AMXTile)
|
||||
X86.HasAMXInt8 = isSet(edx7, cpuid_AMXInt8)
|
||||
X86.HasAMXBF16 = isSet(edx7, cpuid_AMXBF16)
|
||||
|
||||
// These features depend on the second level of extended features.
|
||||
if eax7 >= 1 {
|
||||
eax71, _, _, edx71 := cpuid(7, 1)
|
||||
if X86.HasAVX512 {
|
||||
X86.HasAVX512BF16 = isSet(5, eax71)
|
||||
X86.HasAVX512BF16 = isSet(eax71, cpuid_AVX512BF16)
|
||||
}
|
||||
if X86.HasAVX {
|
||||
X86.HasAVXIFMA = isSet(23, eax71)
|
||||
X86.HasAVXVNNI = isSet(4, eax71)
|
||||
X86.HasAVXVNNIInt8 = isSet(4, edx71)
|
||||
X86.HasAVXIFMA = isSet(eax71, cpuid_AVXIFMA)
|
||||
X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
|
||||
X86.HasAVXVNNIInt8 = isSet(edx71, cpuid_AVXVNNIInt8)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isSet(bitpos uint, value uint32) bool {
|
||||
return value&(1<<bitpos) != 0
|
||||
func isSet(hwc uint32, value uint32) bool {
|
||||
return hwc&value != 0
|
||||
}
|
||||
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Minimal copy from internal/cpu and runtime to make sysctl calls.
|
||||
|
||||
//go:build darwin && arm64 && gc
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Errno = syscall.Errno
|
||||
|
||||
// adapted from internal/cpu/cpu_arm64_darwin.go
|
||||
func darwinSysctlEnabled(name []byte) bool {
|
||||
out := int32(0)
|
||||
nout := unsafe.Sizeof(out)
|
||||
if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil {
|
||||
return false
|
||||
}
|
||||
return out > 0
|
||||
}
|
||||
|
||||
//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib"
|
||||
|
||||
var libc_sysctlbyname_trampoline_addr uintptr
|
||||
|
||||
// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix
|
||||
func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error {
|
||||
if _, _, err := syscall_syscall6(
|
||||
libc_sysctlbyname_trampoline_addr,
|
||||
uintptr(unsafe.Pointer(name)),
|
||||
uintptr(unsafe.Pointer(old)),
|
||||
uintptr(unsafe.Pointer(oldlen)),
|
||||
uintptr(unsafe.Pointer(new)),
|
||||
uintptr(newlen),
|
||||
0,
|
||||
); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib"
|
||||
|
||||
// Implemented in the runtime package (runtime/sys_darwin.go)
|
||||
func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno)
|
||||
|
||||
//go:linkname syscall_syscall6 syscall.syscall6
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
// Code generated by 'go generate'; DO NOT EDIT.
|
||||
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var _ unsafe.Pointer
|
||||
|
||||
// Do the interface allocations only once for common
|
||||
// Errno values.
|
||||
const (
|
||||
errnoERROR_IO_PENDING = 997
|
||||
)
|
||||
|
||||
var (
|
||||
errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
|
||||
errERROR_EINVAL error = syscall.EINVAL
|
||||
)
|
||||
|
||||
// errnoErr returns common boxed Errno values, to prevent
|
||||
// allocations at runtime.
|
||||
func errnoErr(e syscall.Errno) error {
|
||||
switch e {
|
||||
case 0:
|
||||
return errERROR_EINVAL
|
||||
case errnoERROR_IO_PENDING:
|
||||
return errERROR_IO_PENDING
|
||||
}
|
||||
// TODO: add more here, after collecting data on the common
|
||||
// error values see on Windows. (perhaps when running
|
||||
// all.bat?)
|
||||
return e
|
||||
}
|
||||
|
||||
var (
|
||||
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
|
||||
procIsProcessorFeaturePresent = modkernel32.NewProc("IsProcessorFeaturePresent")
|
||||
)
|
||||
|
||||
func isProcessorFeaturePresent(ProcessorFeature uint32) (ret bool) {
|
||||
r0, _, _ := syscall.SyscallN(procIsProcessorFeaturePresent.Addr(), uintptr(ProcessorFeature))
|
||||
ret = r0 != 0
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user