Code: Select all
;For 32-Bit-Windows and OS X
;PB 5.21 LTS (x86)
Test_Value_D.d = -12345.6789 ;Cube Root = -23,112042408247961097779983746659
Appr_D.d ;for all...
Test_Value_F.f = -12345.6789
Appr_F.f
Procedure.f CubeRoot_FM1()
!mov eax, [v_Test_Value_F]
!mov ecx, eax
!and eax, 0x7fffffff ; without sign
!and ecx, 0x80000000 ; sign only
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!movd xmm0, eax
!movq xmm1, xmm0
!mov edx, 0x55555555
!mul edx
!add edx, 0x2a508935
!movd xmm2, edx
!addss xmm1, xmm1 ; xmm1=2*a=constant
!movq xmm3, xmm2
!mulss xmm3, xmm3
!mulss xmm3, xmm2 ; xmm3=x*x*x
!movq xmm4, xmm3
!addss xmm4, xmm4 ; 2*x*x*x
!addss xmm4, xmm0 ; xmm0=a
!rcpss xmm4, xmm4
!addss xmm3, xmm1
!mulss xmm3, xmm4
!mulss xmm2, xmm3
!movq xmm3, xmm2
!mulss xmm3, xmm3
!mulss xmm3, xmm2 ; xmm3=x*x*x
!movq xmm4, xmm3
!addss xmm4, xmm4 ; 2*x*x*x
!addss xmm4, xmm0 ; xmm0=a
!addss xmm3, xmm1
!divss xmm3, xmm4
!mulss xmm2, xmm3
!movd xmm3, ecx
!por xmm2, xmm3 ; restore sign
!movd [v_Appr_F], xmm2
!fld dword [v_Appr_F]
ProcedureReturn
EndProcedure
Procedure.f CubeRoot_FM2()
; based on both Kahan's bit hack and this routine http://www.musicdsp.org/showone.php?id=206
!mov eax, [v_Test_Value_F]
!mov ecx, eax
!and eax, 0x7fffffff ; without sign
!and ecx, 0x80000000 ; sign only
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!movd xmm0, eax
!movq xmm1, xmm0
!mov edx, 0x55555555
!mul edx
!add edx, 0x2a508935
!movd xmm0, edx
!movq xmm2, xmm0 ; x2: z
!mulss xmm2, xmm0 ; x2: z*z
!movq xmm3, xmm2 ; x3: z*z
!addss xmm2, xmm2 ; x2: 2*z*z
!addss xmm2, xmm3 ; x2: 3*z*z
!rcpss xmm2, xmm2 ; x2: ~ 1/(3*z*z)
!mulss xmm3, xmm0 ; x3: z*z*z
!subss xmm3, xmm1 ; x3: z*z*z-x
!mulss xmm3, xmm2 ; x3: (z*z*z-x) / (3*z*z)
!subss xmm0, xmm3 ; x0: z'
!movq xmm2, xmm0 ; x2: z
!mulss xmm2, xmm0 ; x2: z*z
!movq xmm3, xmm2 ; x3: z*z
!addss xmm2, xmm2 ; x2: 2*z*z
!addss xmm2, xmm3 ; x2: 3*z*z
!rcpss xmm2, xmm2 ; x2: ~ 1/(3*z*z)
!mulss xmm3, xmm0 ; x3: z*z*z
!subss xmm3, xmm1 ; x3: z*z*z-x
!mulss xmm3, xmm2 ; x3: (z*z*z-x) / (3*z*z)
!subss xmm0, xmm3 ; x0: z'
!movd xmm1, ecx
!por xmm0, xmm1 ; restore sign
!movd [v_Appr_F], xmm0
!fld dword [v_Appr_F]
ProcedureReturn
EndProcedure
Procedure.d CubeRoot_DM1()
!movsd xmm0, [v_Test_Value_D]
!pcmpeqb xmm4, xmm4 ; create mask 0x7fffffffffffffff
!psrlq xmm4, 1
!movsd xmm1, xmm0
!pand xmm1, xmm4 ; without sign
!pandn xmm4, xmm0 ; sign only
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!pshufd xmm0, xmm1, 1
!movq xmm2, [CubeRootConst]
!pmuludq xmm0, xmm2
!paddd xmm0, xmm2
!movsd xmm2, xmm0
!mulsd xmm2, xmm2
!mulsd xmm2, xmm0 ; xmm2=x*x*x
!movsd xmm3, xmm2
!addsd xmm3, xmm3 ; 2*x*x*x
!addsd xmm3, xmm1 ; xmm1=a
!addsd xmm2, xmm1
!addsd xmm2, xmm1
!divsd xmm2, xmm3
!mulsd xmm0, xmm2
!movsd xmm2, xmm0
!mulsd xmm2, xmm2
!mulsd xmm2, xmm0 ; xmm2=x*x*x
!movsd xmm3, xmm2
!addsd xmm3, xmm3 ; 2*x*x*x
!addsd xmm3, xmm1 ; xmm1=a
!addsd xmm2, xmm1
!addsd xmm2, xmm1
!divsd xmm2, xmm3
!mulsd xmm0, xmm2
!por xmm0, xmm4
!movsd [v_Appr_D], xmm0
!fld qword [v_Appr_D]
ProcedureReturn
!CubeRootConst: dd 0x55555555, 0x2a9f7893
EndProcedure
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Procedure.f Cube_Root_F_H_x86() ;only for x86
!mov eax,[v_Test_Value_F]
!and eax,7FFFFFFFh ;without sign
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!mov [v_Appr_F],eax
!movss xmm0,[v_Appr_F]
!movss xmm1,xmm0
!lea edx,[v_Appr_F]
!mov eax,[edx]
!xor edx,edx
!div dword[Value3_F]
!add eax,[BitHack_F]
!mov [v_Appr_F],eax
!movss xmm2,[v_Appr_F]
!addss xmm1,xmm1 ;xmm1=2*a=constant
!mov ecx,2 ;higher=more precision (if possible)
!@@b:
!movss xmm3,xmm2
!mulss xmm3,xmm3
!mulss xmm3,xmm2 ;xmm3=x*x*x
!movss xmm4,xmm3
!addss xmm4,xmm4 ;2*x*x*x
!addss xmm4,xmm0 ;xmm0=a
!addss xmm3,xmm1
!divss xmm3,xmm4
!mulss xmm2,xmm3
!dec ecx
!jnz @@b
;set sign (if Test_Value negativ)
!test byte[v_Test_Value_F+3],80h
!jz @@f
!mulss xmm2,[Minus1_F] ;restore sign
!@@f:
!movss [v_Appr_F],xmm2
!fld dword[v_Appr_F]
ProcedureReturn
!Minus1_F: dd -1.0
!BitHack_F: dd 709921077 ;for Kahan´s bit hack (Float); http://metamerist.com/cbrt/cbrt.htm
!Value3_F: dd 3
EndProcedure
Procedure.d Cube_Root_D_H_x86() ;only for x86
!mov eax,dword[v_Test_Value_D+4]
!mov edx,dword[v_Test_Value_D]
!mov dword[v_Appr_D],edx
!and eax,7FFFFFFFh ;without sign
;now find a good approximation for the start-value; http://metamerist.com/cbrt/cbrt.htm - Kahan´s bit hack
!mov dword[v_Appr_D+4],eax
!movsd xmm0,[v_Appr_D]
!movsd xmm1,xmm0
!lea edx,[v_Appr_D]
!mov eax,[edx+4]
!xor edx,edx
!div dword[Value3_D]
!add eax,[BitHack_D+4]
!mov dword[v_Appr_D+4],eax
!movsd xmm2,[v_Appr_D]
!addsd xmm1,xmm1 ;xmm1=2*a=constant
!mov ecx,2 ;higher=more precision (if possible)
!@@b1:
!movsd xmm3,xmm2
!mulsd xmm3,xmm3
!mulsd xmm3,xmm2 ;xmm3=x*x*x
!movsd xmm4,xmm3
!addsd xmm4,xmm4 ;2*x*x*x
!addsd xmm4,xmm0 ;xmm0=a
!addsd xmm3,xmm1
!divsd xmm3,xmm4
!mulsd xmm2,xmm3
!dec ecx
!jnz @@b1
;set sign (if Test_Value negativ)
!test byte[v_Test_Value_D+7],80h
!jz @@f1
!mulsd xmm2,[Minus1_D] ;restore sign
!@@f1:
!movsd [v_Appr_D],xmm2
!fld qword[v_Appr_D]
ProcedureReturn
!Minus1_D: dq -1.0
!BitHack_D: dq 3071306043645493248 ;715094163<<32, for Kahan´s bit hack (Double); http://metamerist.com/cbrt/cbrt.htm
!Value3_D: dd 3
EndProcedure
CompilerEndIf
Procedure.f Cube_Root_F_P() ;for x86 and x64
!fld dword[Dat_F]
!fld dword[v_Test_Value_F]
!fabs
!fyl2x ;->log2(Src1)*exponent
!fld st0 ;copy the logarithm
!frndint ;keep only the characteristic
!fsub st1,st0 ;keeps only the mantissa
!fxch ;get the mantissa on top
!f2xm1 ;->2^(mantissa)-1
!fld1
!faddp ;add 1 back
!fscale ;scale it with the characteristic
!fstp st1 ;copy result over and "pop" it
!test byte[v_Test_Value_F+3],$80
!jz @@f2
!fchs ;restore sign
!@@f2:
ProcedureReturn
!Dat_F: dd 0.33333333333333333333 ;:-)
EndProcedure
Procedure.d Cube_Root_D_P() ;for x86 and x64
!fld qword[Dat_D]
!fld qword[v_Test_Value_D]
!fabs
!fyl2x ;->log2(Src1)*exponent
!fld st0 ;copy the logarithm
!frndint ;keep only the characteristic
!fsub st1,st0 ;keeps only the mantissa
!fxch ;get the mantissa on top
!f2xm1 ;->2^(mantissa)-1
!fld1
!faddp ;add 1 back
!fscale ;scale it with the characteristic
!fstp st1 ;copy result over and "pop" it
!test byte[v_Test_Value_D+7],$80
!jz @@f3
!fchs ;restore sign
!@@f3:
ProcedureReturn
!Dat_D: dq 0.33333333333333333333 ;:-)
EndProcedure
Result$ = "Test-Value : -12345.6789" + #LFCR$
TA_FM1 = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_FM1.f = CubeRoot_FM1()
Next
TE_FM1 = ElapsedMilliseconds() - TA_FM1
Result$ + "Float M1:" + #LFCR$ + StrF(Cbrt_FM1, 7) + " / " + Str(TE_FM1) + " ms " + #LFCR$
TA_FM2 = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_FM2.f = CubeRoot_FM2()
Next
TE_FM2 = ElapsedMilliseconds() - TA_FM2
Result$ + "Float M2:" + #LFCR$ + StrF(Cbrt_FM2, 7) + " / " + Str(TE_FM2) + " ms " + #LFCR$
TA_DM1 = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_DM1.d = CubeRoot_DM1()
Next
TE_DM1 = ElapsedMilliseconds() - TA_DM1
Result$ + "Double M1:" + #LFCR$ + StrD(Cbrt_DM1, 15) + " / " + Str(TE_DM1) + " ms " + #LFCR$
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
TA_F_H = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_F_H_x86.f = Cube_Root_F_H_x86()
Next
TE_F_H = ElapsedMilliseconds() - TA_F_H
Result$ + "Float Helle:" + #LFCR$ + StrF(Cbrt_F_H_x86, 7) + " / " + Str(TE_F_H) + " ms " + #LFCR$
TA_D_H = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_D_H_x86.d = Cube_Root_D_H_x86()
Next
TE_D_H = ElapsedMilliseconds() - TA_D_H
Result$ + "Double Helle:" + #LFCR$ + StrD(Cbrt_D_H_x86, 15) + " / " + Str(TE_D_H) + " ms " + #LFCR$
CompilerEndIf
TA_F_P = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_F_P.f = Cube_Root_F_P()
Next
TE_F_P = ElapsedMilliseconds() - TA_F_P
Result$ + "Float Psychophanta:" + #LFCR$ + StrF(Cbrt_F_P, 7) + " / " + Str(TE_F_P) + " ms " + #LFCR$
TA_D_P = ElapsedMilliseconds()
For i = 1 To 10000000
Cbrt_D_P.d = Cube_Root_D_P()
Next
TE_D_P = ElapsedMilliseconds() - TA_D_P
Result$ + "Double Psychophanta:" + #LFCR$ + StrD(Cbrt_D_P, 15) + " / " + Str(TE_D_P) + " ms " + #LFCR$ + #LFCR$
;Result$ + "CPU: Intel i7-4770K@4.0GHz" + #LFCR$
CompilerIf #PB_OS_MacOS
ImportC ""
sysctlbyname.l(s.s,*buffer,*length,*null,*null2)
EndImport
*buffer = AllocateMemory(128)
length.l=MemorySize(*buffer)
err.l=sysctlbyname("machdep.cpu.brand_string", *buffer, @length, 0, 0)
If (Not err)
Result$ + PeekS(*buffer) + #LFCR$
EndIf
CompilerEndIf
;SetClipboardText(Result$)
MessageRequester("Cube Root x86, 10000000 Loops", Result$)