104 lines
1.7 KiB
ArmAsm
104 lines
1.7 KiB
ArmAsm
// +build amd64,!appengine,!go1.9
|
|
|
|
TEXT ·hasAsm(SB),4,$0-1
|
|
MOVQ $1, AX
|
|
CPUID
|
|
SHRQ $23, CX
|
|
ANDQ $1, CX
|
|
MOVB CX, ret+0(FP)
|
|
RET
|
|
|
|
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
|
|
|
TEXT ·popcntSliceAsm(SB),4,$0-32
|
|
XORQ AX, AX
|
|
MOVQ s+0(FP), SI
|
|
MOVQ s_len+8(FP), CX
|
|
TESTQ CX, CX
|
|
JZ popcntSliceEnd
|
|
popcntSliceLoop:
|
|
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
|
ADDQ DX, AX
|
|
ADDQ $8, SI
|
|
LOOP popcntSliceLoop
|
|
popcntSliceEnd:
|
|
MOVQ AX, ret+24(FP)
|
|
RET
|
|
|
|
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
|
XORQ AX, AX
|
|
MOVQ s+0(FP), SI
|
|
MOVQ s_len+8(FP), CX
|
|
TESTQ CX, CX
|
|
JZ popcntMaskSliceEnd
|
|
MOVQ m+24(FP), DI
|
|
popcntMaskSliceLoop:
|
|
MOVQ (DI), DX
|
|
NOTQ DX
|
|
ANDQ (SI), DX
|
|
POPCNTQ_DX_DX
|
|
ADDQ DX, AX
|
|
ADDQ $8, SI
|
|
ADDQ $8, DI
|
|
LOOP popcntMaskSliceLoop
|
|
popcntMaskSliceEnd:
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
|
XORQ AX, AX
|
|
MOVQ s+0(FP), SI
|
|
MOVQ s_len+8(FP), CX
|
|
TESTQ CX, CX
|
|
JZ popcntAndSliceEnd
|
|
MOVQ m+24(FP), DI
|
|
popcntAndSliceLoop:
|
|
MOVQ (DI), DX
|
|
ANDQ (SI), DX
|
|
POPCNTQ_DX_DX
|
|
ADDQ DX, AX
|
|
ADDQ $8, SI
|
|
ADDQ $8, DI
|
|
LOOP popcntAndSliceLoop
|
|
popcntAndSliceEnd:
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
|
XORQ AX, AX
|
|
MOVQ s+0(FP), SI
|
|
MOVQ s_len+8(FP), CX
|
|
TESTQ CX, CX
|
|
JZ popcntOrSliceEnd
|
|
MOVQ m+24(FP), DI
|
|
popcntOrSliceLoop:
|
|
MOVQ (DI), DX
|
|
ORQ (SI), DX
|
|
POPCNTQ_DX_DX
|
|
ADDQ DX, AX
|
|
ADDQ $8, SI
|
|
ADDQ $8, DI
|
|
LOOP popcntOrSliceLoop
|
|
popcntOrSliceEnd:
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
|
XORQ AX, AX
|
|
MOVQ s+0(FP), SI
|
|
MOVQ s_len+8(FP), CX
|
|
TESTQ CX, CX
|
|
JZ popcntXorSliceEnd
|
|
MOVQ m+24(FP), DI
|
|
popcntXorSliceLoop:
|
|
MOVQ (DI), DX
|
|
XORQ (SI), DX
|
|
POPCNTQ_DX_DX
|
|
ADDQ DX, AX
|
|
ADDQ $8, SI
|
|
ADDQ $8, DI
|
|
LOOP popcntXorSliceLoop
|
|
popcntXorSliceEnd:
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|