Cool.

Many thanks, wilbert and luis!
These are typical results that I get for speed testing (PB 5.31 x86):
Code:
---------------------------
PopCount speed test
---------------------------
t0 = 29 ms (11%) ; 'popcnt' asm instruction
t1 = 259 ms (100%) ; wilbert
t2 = 334 ms (128%) ; luis
t3 = 737 ms (284%) ; luis (using 'loop')
t4 = 1316 ms (508%) ; LJ
t5 = 1240 ms (478%) ; Trond (recursive)
t6 = 854 ms (329%) ; Trond
t7 = 40 ms (15%) ; idle
---------------------------
For the record, here is the code that I used for testing. Thanks again!
Code:
; PB 5.31
EnableExplicit
Procedure.l PopCount0 (k.l)
! mov eax, [p.v_k]
! popcnt eax, eax
ProcedureReturn
EndProcedure
Procedure.l PopCount1 (k.l)
; -- wilbert
!xor eax, eax
!mov edx, [p.v_k]
!and edx, edx
!jz popcount_m2_done
!popcount_m2_loop:
!inc eax
!lea ecx, [edx - 1]
!and edx, ecx
!jnz popcount_m2_loop
!popcount_m2_done:
ProcedureReturn
EndProcedure
Procedure.l PopCount2 (k.l)
; -- luis
!mov edx, [p.v_k]
!xor eax, eax
!mov ecx, 32
!popcount2:
!shl edx, 1
!adc eax, 0
!dec ecx
!jnz popcount2
ProcedureReturn
EndProcedure
Procedure.l PopCount3 (k.l)
; -- luis
!mov edx, [p.v_k]
!xor eax, eax
!mov ecx, 32
!popcount3:
!shl edx, 1
!adc eax, 0
!loop popcount3
ProcedureReturn
EndProcedure
Procedure.l PopCount4 (k.l)
; -- LJ
! mov edx, [p.v_k]
! xor eax, eax
! mov ecx, 32
! popcount_again:
! dec ecx
! bt edx, ecx
! jnc popcount_notset
! inc eax
! popcount_notset:
! or ecx, ecx
! jnz popcount_again
ProcedureReturn
EndProcedure
Procedure.l PopCount5 (v.l, i = 0)
; -- Trond
If i < 32
ProcedureReturn (v & 1 << i) >> i + PopCount5(v, i+1)
EndIf
EndProcedure
Procedure.l PopCount6 (v.l)
; -- Trond
Protected i.i, Agg.l
For i = 0 To 31
Agg + (v & 1 << i) >> i
Next
ProcedureReturn Agg
EndProcedure
Procedure.l Popcount7 (x.l)
; -- idle (adapted for 32 bit)
;x - (x >> 1) & $55555555
!mov eax, [p.v_x]
!mov edx, eax
!shr edx, 1
!and edx, 0x55555555
!sub eax, edx
;x = (x & $33333333) + ((x >> 2) & $33333333)
!mov edx, eax ;x
!and eax, 0x33333333
!shr edx, 2
!and edx, 0x33333333
!add eax, edx
;x = (x + (x >> 4)) & $0f0f0f0f0f
!mov edx, eax
!shr edx, 4
!add eax, edx
!and eax, 0x0f0f0f0f
;x * 0x01010101 >> 24
!imul eax, 0x01010101
!shr eax, 24
ProcedureReturn
EndProcedure
; ---- Initialisation ----
Define.i i, x, rep=10000000
Define.i t0, t1, t2, t3, t4, t5, t6, t7
Dim Rnd.l(rep)
For i = 1 To rep
Rnd(i) = Random(2147483647)
Next
; ---- Small check whether all procedures return the same results ----
For i = 1 To 100
x = PopCount0(Rnd(i))
If x <> PopCount1(Rnd(i)) Or
x <> PopCount2(Rnd(i)) Or
x <> PopCount3(Rnd(i)) Or
x <> PopCount4(Rnd(i)) Or
x <> PopCount5(Rnd(i)) Or
x <> PopCount6(Rnd(i)) Or
x <> PopCount7(Rnd(i))
MessageRequester("Error",
"Different results for PopCount(" + Rnd(i) + ")")
End
EndIf
Next
; ---- Speed test ----
t0 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount0(Rnd(i))
Next
t0 = ElapsedMilliseconds() - t0
t1 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount1(Rnd(i))
Next
t1 = ElapsedMilliseconds() - t1
t2 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount2(Rnd(i))
Next
t2 = ElapsedMilliseconds() - t2
t3 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount3(Rnd(i))
Next
t3 = ElapsedMilliseconds() - t3
t4 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount4(Rnd(i))
Next
t4 = ElapsedMilliseconds() - t4
t5 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount5(Rnd(i))
Next
t5 = ElapsedMilliseconds() - t5
t6 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount6(Rnd(i))
Next
t6 = ElapsedMilliseconds() - t6
t7 = ElapsedMilliseconds()
For i = 1 To rep
x = PopCount7(Rnd(i))
Next
t7 = ElapsedMilliseconds() - t7
MessageRequester("PopCount speed test",
"t0 = " + t0 + " ms (" + Int(100*t0/t1) + "%)" + #LF$ +
"t1 = " + t1 + " ms (100%)" + #LF$ +
"t2 = " + t2 + " ms (" + Int(100*t2/t1) + "%)" + #LF$ +
"t3 = " + t3 + " ms (" + Int(100*t3/t1) + "%)" + #LF$ +
"t4 = " + t4 + " ms (" + Int(100*t4/t1) + "%)" + #LF$ +
"t5 = " + t5 + " ms (" + Int(100*t5/t1) + "%)" + #LF$ +
"t6 = " + t6 + " ms (" + Int(100*t6/t1) + "%)" + #LF$ +
"t7 = " + t7 + " ms (" + Int(100*t7/t1) + "%)")