seems I forgot to post this but it the x86 specific popcountmem functions need checking
Code: Select all
Procedure _Popcount32(x.l)
;x - (x >> 1) & $55555555
!mov eax, [p.v_x]
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax,edx
;x = (x & $33333333) + ((x >> 2) & $33333333)
!mov edx, eax ;x
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax,edx
;x = (x + (x >> 4)) & $0f0f0f0f0f
!mov edx, eax
!shr edx, 4
!add eax,edx
!and eax, 0x0f0f0f0f
;x * 0x01010101 >> 24
!imul eax, 0x01010101
!shr eax, 24
ProcedureReturn
EndProcedure
CompilerIf SizeOf(Integer) = 4
Procedure _popcountmem(*mem.long,len.l)
Protected result,a,b
!mov eax, [p.v_len]
!and eax, 3
!mov edx, [p.v_len]
!sub edx,eax
!mov [p.v_a],edx
!mov [p.v_b],eax
!xor ecx,ecx
!lwhile:
!cmp ecx, [p.v_a]
!jge lend
!mov eax, [p.p_mem]
!mov eax, [eax + ecx]
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax,edx
!mov edx, eax
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax,edx
!mov edx, eax
!shr edx, 4
!add eax,edx
!and eax, 0x0f0f0f0f
!imul eax, 0x01010101
!shr eax, 24
!add [p.v_result],eax
!add ecx,4
!jmp lwhile
!lend:
!mov eax, [p.p_mem]
!mov eax, [eax + ecx]
!mov edx, [p.v_b]
!jmp dword [JT_remain + edx * 4]
!JT_remain dd le,l1,l2,l3,l4
!l1:
!and eax, 0xff
!jmp le
!l2:
!and eax, 0xffff
!jmp le
!l3:
!and eax, 0xffffff
!jmp le
!l4:
!le:
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax,edx
!mov edx, eax
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax,edx
!mov edx, eax
!shr edx, 4
!add eax,edx
!and eax, 0x0f0f0f0f
!imul eax, 0x01010101
!shr eax, 24
!add [p.v_result],eax
ProcedureReturn result
EndProcedure
Procedure _popcountmemSSE(*mem,len)
Protected result,a,b
!mov eax, [p.v_len]
!and eax, 3
!mov edx, [p.v_len]
!sub edx,eax
!mov [p.v_a],edx
!mov [p.v_b],eax
!xor ecx,ecx
!lwhile1:
!cmp ecx, [p.v_a]
!jge lend1
!mov eax, [p.p_mem]
!mov eax, [eax + ecx]
!popcnt eax,eax
!add [p.v_result],eax
!add ecx,4
!jmp lwhile1
!lend1:
!mov eax, [p.p_mem]
!mov eax, [eax + ecx]
!mov edx, [p.v_b]
!jmp dword [JT_remain1 + edx * 4]
!JT_remain1 dd lle,ll1,ll2,ll3,ll4
!ll1:
!and eax, 0xff
!jmp lle
!ll2:
!and eax, 0xffff
!jmp lle
!ll3:
!and eax, 0xffffff
!jmp lle
!ll4:
!lle:
!mov edx,eax
!popcnt eax,edx
!add [p.v_result],eax
ProcedureReturn result
EndProcedure
CompilerElse
Procedure _Popcount64(x.i)
;x - (x >> 1) & $5555555555555555
!mov rax, [p.v_x]
!mov rdx, rax
!shr rdx, 1
!mov r15, 0x5555555555555555
!and rdx, r15
!sub rax,rdx
;x = (x & $3333333333333333) + ((x >> 2) & $3333333333333333)
!mov rdx, rax ;x
!mov r15, 0x3333333333333333
!and rax, r15
!shr rdx, 2
!and rdx, r15
!add rax,rdx
;x = (x + (x >> 4)) & $0f0f0f0f0f0f0f0f
!mov rdx, rax
!shr rdx, 4
!add rax,rdx
!mov r15, 0x0f0f0f0f0f0f0f0f
!and rax, r15
;x * 0101010101010101 >> 56
!mov r15, 0x0101010101010101
!imul rax, r15
!shr rax, 56
ProcedureReturn
EndProcedure
Procedure _popcountmem(*mem,len)
Protected result,a,b
!mov rax, [p.v_len]
!And rax, 3
!mov rdx, [p.v_len]
!sub rdx,rax
!mov [p.v_a],rdx
!mov [p.v_b],rax
!XOr rcx,rcx
!lwhile:
!cmp rcx, [p.v_a]
!jge lend
!mov rax, [p.p_mem]
!mov eax, [rax + rcx]
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax,edx
!mov edx, eax
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax,edx
!mov edx, eax
!shr edx, 4
!add eax,edx
!and eax, 0x0f0f0f0f
!imul eax, 0x01010101
!shr eax, 24
!add [p.v_result],eax
!add rcx,4
!jmp lwhile
!lend:
!mov rax, [p.p_mem]
!mov eax, [rax + rcx]
!lea rdx, [JT_remain]
!mov rcx, [p.v_b]
!jmp qword [rdx + rcx * 8]
!JT_remain dq le,l1,l2,l3,l4
!l1:
!and eax, 0xff
!jmp le
!l2:
!and eax, 0xffff
!jmp le
!l3:
!and eax, 0xffffff
!jmp le
!l4:
!le:
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax,edx
!mov edx, eax
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax,edx
!mov edx, eax
!shr edx, 4
!add eax,edx
!and eax, 0x0f0f0f0f
!imul eax, 0x01010101
!shr eax, 24
!add [p.v_result],eax
ProcedureReturn result
EndProcedure
Procedure _popcountmemSSE(*mem,len)
Protected result,a,b
!mov rax, [p.v_len]
!and rax, 7
!mov rdx, [p.v_len]
!sub rdx,rax
!mov [p.v_a],rdx
!mov [p.v_b],rax
!xor rcx,rcx
!lwhile1:
!cmp rcx, [p.v_a]
!jge lend1
!mov rax, [p.p_mem]
!mov rax, [rax + rcx]
!popcnt rax,rax
!add [p.v_result],rax
!add rcx,8
!jmp lwhile1
!lend1:
!mov rax, [p.p_mem]
!mov rax, [rax + rcx]
!lea rdx, [JT_remain1]
!mov rcx, [p.v_b]
!jmp qword [rdx + rcx * 8]
!JT_remain1 dq lle,ll1,ll2,ll3,ll4,ll5,ll6,ll7,ll8
!ll1:
!and rax, 0xff
!jmp lle
!ll2:
!and rax, 0xffff
!jmp lle
!ll3:
!and rax, 0xffffff
!jmp lle
!ll4:
!mov rdx,0xffffffff
!and rax, rdx
!jmp lle
!ll5:
!mov rdx, 0xffffffffff
!and rax, rdx
!jmp lle
!ll6:
!mov rdx,0xffffffffffff
!and rax, rdx
!jmp lle
!ll7:
!mov rdx,0xffffffffffffff
!and rax, rdx
!jmp lle
!ll8:
!lle:
!mov rdx,rax
!popcnt rax,rdx
!add [p.v_result],rax
ProcedureReturn result
EndProcedure
CompilerEndIf
Procedure _PopCountSSE4(x.i)
CompilerIf SizeOf(Integer) = 8
!popcnt rax,[p.v_x]
CompilerElse
!popcnt eax,[p.v_x]
CompilerEndIf
ProcedureReturn
EndProcedure
Prototype PopCount(x.i)
Prototype PopCountMem(*mem,len)
Global PopCount.PopCount
Global PopCountMem.PopCountMem
Procedure InitpopCount(FallBackMode.i=64)
Protected result
!mov eax, 1
!cpuid
!shr ecx, 23
!and ecx, 1
!mov [p.v_result],ecx
If result
popcount = @_PopCountSSE4()
popcountmem = @_popcountmemSSE()
Else
CompilerIf SizeOf(Integer) = 8
If FallBackMode = 64
popcount = @_popcount64()
popcountmem = @_popcountmem()
Else
popcount = @_popcount32()
popcountmem = @_popcountmem()
EndIf
CompilerElse
popcount = @_popcount32()
popcountmem = @_popcountmem()
CompilerEndIf
EndIf
EndProcedure
CompilerIf #PB_Compiler_IsMainFile
InitpopCount() ; sets popcount to the best supported function
;on x64 if you want to set the mode to the 32 function range, call it with InitpopCount(#PB_Long)
x.i = %10101010101010101010101010101010
Debug PopCount(x) ;16
mx = Random(1024,128)
*mem = AllocateMemory(mx)
*pa.Ascii = *mem
For a = 0 To mx-1
*pa\a = 3
*pa+1
r+2
Next
Debug r
Debug PopCountMem(*mem,mx)
Debug _popcountmem(*mem,mx)
CompilerEndIf