Test-Code for x86 (e.g. with other CPU´s):
Code: Select all
;For 32-Bit-Windows
;PB 5.21 LTS (x86)
Test_Value_D.d = -12345.6789 ;Cube Root = -23,112042408247961097779983746659
Appr_D.d ;for all...
Test_Value_F.f = -12345.6789
Appr_F.f
Procedure.f Cube_Root_F_H_x86() ;only for x86
!mov eax,[v_Test_Value_F]
!and eax,7FFFFFFFh ;without sign
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!mov [v_Appr_F],eax
!movss xmm0,[v_Appr_F]
!movss xmm1,xmm0
!lea edx,[v_Appr_F]
!mov eax,[edx]
!xor edx,edx
!div dword[Value3_F]
!add eax,[BitHack_F]
!mov [v_Appr_F],eax
!movss xmm2,[v_Appr_F]
!addss xmm1,xmm1 ;xmm1=2*a=constant
!mov ecx,2 ;higher=more precision (if possible)
!@@:
!movss xmm3,xmm2
!mulss xmm3,xmm3
!mulss xmm3,xmm2 ;xmm3=x*x*x
!movss xmm4,xmm3
!addss xmm4,xmm4 ;2*x*x*x
!addss xmm4,xmm0 ;xmm0=a
!addss xmm3,xmm1
!divss xmm3,xmm4
!mulss xmm2,xmm3
!dec ecx
!jnz @b
;set sign (if Test_Value negativ)
!test byte[v_Test_Value_F+3],80h
!jz @f
!mulss xmm2,[Minus1_F] ;restore sign
!@@:
!movss dword[v_Appr_F],xmm2
!fld dword[v_Appr_F]
ProcedureReturn
!Minus1_F: dd -1.0
!BitHack_F: dd 709921077 ;for Kahan´s bit hack (Float); http://metamerist.com/cbrt/cbrt.htm
!Value3_F: dd 3
EndProcedure
Procedure.d Cube_Root_D_H_x86() ;only for x86
!mov eax,dword[v_Test_Value_D+4]
!mov edx,dword[v_Test_Value_D]
!mov dword[v_Appr_D],edx
!and eax,7FFFFFFFh ;without sign
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!mov dword[v_Appr_D+4],eax
!movsd xmm0,[v_Appr_D]
!movsd xmm1,xmm0
!lea edx,[v_Appr_D]
!mov eax,[edx+4]
!xor edx,edx
!div dword[Value3_D]
!add eax,[BitHack_D+4]
!mov dword[v_Appr_D+4],eax
!movsd xmm2,[v_Appr_D]
!addsd xmm1,xmm1 ;xmm1=2*a=constant
!mov ecx,2 ;higher=more precision (if possible)
!@@:
!movsd xmm3,xmm2
!mulsd xmm3,xmm3
!mulsd xmm3,xmm2 ;xmm3=x*x*x
!movsd xmm4,xmm3
!addsd xmm4,xmm4 ;2*x*x*x
!addsd xmm4,xmm0 ;xmm0=a
!addsd xmm3,xmm1
!divsd xmm3,xmm4
!mulsd xmm2,xmm3
!dec ecx
!jnz @b
;set sign (if Test_Value negativ)
!test byte[v_Test_Value_D+7],80h
!jz @f
!mulsd xmm2,[Minus1_D] ;restore sign
!@@:
!movsd qword[v_Appr_D],xmm2
!fld qword[v_Appr_D]
ProcedureReturn
!Minus1_D: dq -1.0
!BitHack_D: dq 3071306043645493248 ;715094163<<32, for Kahan´s bit hack (Double); http://metamerist.com/cbrt/cbrt.htm
!Value3_D: dd 3
EndProcedure
Procedure.f Cube_Root_F_P() ;for x86 and x64
!fld dword[Dat_F]
!fld dword[v_Test_Value_F]
!fabs
!fyl2x ;->log2(Src1)*exponent
!fld st0 ;copy the logarithm
!frndint ;keep only the characteristic
!fsub st1,st0 ;keeps only the mantissa
!fxch ;get the mantissa on top
!f2xm1 ;->2^(mantissa)-1
!fld1
!faddp ;add 1 back
!fscale ;scale it with the characteristic
!fstp st1 ;copy result over and "pop" it
!test byte[v_Test_Value_F+3],$80
!jz @f
!fchs ;restore sign
!@@:
ProcedureReturn
!Dat_F: dd 0.33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 ;:-)
EndProcedure
Procedure.d Cube_Root_D_P() ;for x86 and x64
!fld qword[Dat_D]
!fld qword[v_Test_Value_D]
!fabs
!fyl2x ;->log2(Src1)*exponent
!fld st0 ;copy the logarithm
!frndint ;keep only the characteristic
!fsub st1,st0 ;keeps only the mantissa
!fxch ;get the mantissa on top
!f2xm1 ;->2^(mantissa)-1
!fld1
!faddp ;add 1 back
!fscale ;scale it with the characteristic
!fstp st1 ;copy result over and "pop" it
!test byte[v_Test_Value_D+7],$80
!jz @f
!fchs ;restore sign
!@@:
ProcedureReturn
!Dat_D: dq 0.33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 ;:-)
EndProcedure
TA_F_H = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_F_H_x86.f = Cube_Root_F_H_x86()
Next
TE_F_H = ElapsedMilliseconds() - TA_F_H
Result$ = "Test-Value : -12345.6789" + #LFCR$ + "Float Helle:" + #LFCR$ + StrF(Cbrt_F_H_x86, 7) + " / " + Str(TE_F_H) + " ms " + #LFCR$
TA_D_H = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_D_H_x86.d = Cube_Root_D_H_x86()
Next
TE_D_H = ElapsedMilliseconds() - TA_D_H
Result$ + "Double Helle:" + #LFCR$ + StrD(Cbrt_D_H_x86, 15) + " / " + Str(TE_D_H) + " ms " + #LFCR$
TA_F_P = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_F_P.f = Cube_Root_F_P()
Next
TE_F_P = ElapsedMilliseconds() - TA_F_P
Result$ + "Float Psychophanta:" + #LFCR$ + StrF(Cbrt_F_P, 7) + " / " + Str(TE_F_P) + " ms " + #LFCR$
TA_D_P = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_D_P.d = Cube_Root_D_P()
Next
TE_D_P = ElapsedMilliseconds() - TA_D_P
Result$ + "Double Psychophanta:" + #LFCR$ + StrD(Cbrt_D_P, 15) + " / " + Str(TE_D_P) + " ms " + #LFCR$ + #LFCR$
Result$ + "CPU: Intel i7-4770K@4.0GHz" + #LFCR$
;SetClipboardText(Result$)
MessageRequester("Cube Root x86, 10000000 Loops", Result$)
My results (with VirtualBox under Win7 x64):
Test-Value : -12345.6789
Float Helle:
-23.1120415 / 191 ms
Double Helle:
-23.112042408247962 / 362 ms; I have for Double_x86 too many overhead
Float Psychophanta:
-23.1120453 / 481 ms
Double Psychophanta:
-23.112042408247959 / 488 ms
CPU: Intel i7-4770K@4.0GHz