My code for this with use of FFT:
Code: Select all
;- Fast Unsigned FFT-Integer-Multiplication
;- see https://en.wikipedia.org/wiki/Butterfly_diagram (radix-2 decimation-in-time FFT algorithm)
;- FFT-Multplication in PureBasic see https://www.purebasic.fr/german/viewtopic.php?f=8&t=29137
;- CPU with AVX, Windows 64-Bit, PureBasic 5.73 LTS (x64), Jan 19, 2021
;- Ascii and Unicode
;- "Helle" Klaus Helbing
EnableExplicit
Global.q Asc_Uni, Basis, Event, FFT, Potenz, ZeitA, ZeitE, i, j ;i,j=universal counter/pointer
Global Basis$, Factor1$, Factor2$, Info$, Result$
Structure Mersenne_Values
MExp.q
MExp$ ;Info
MBeginEnd$ ;info
MName$ ;Info
MDigits$ ;Info
EndStructure
Global Dim Mersenne.Mersenne_Values(27) ;M25-M51, Test
Mersenne(00)\MExp = 21701 : Mersenne(00)\MExp$ = "(2^21701)-1" : Mersenne(00)\MBeginEnd$ = "44867916611904333479...57410828353511882751" : Mersenne(00)\MName$ = "M25, " : Mersenne(00)\MDigits$ = ", 6533" ;M25
Mersenne(01)\MExp = 23209 : Mersenne(01)\MExp$ = "(2^23209)-1" : Mersenne(01)\MBeginEnd$ = "40287411577898877818...36743355523779264511" : Mersenne(01)\MName$ = "M26, " : Mersenne(01)\MDigits$ = ", 6987" ;M26
Mersenne(02)\MExp = 44497 : Mersenne(02)\MExp$ = "(2^44497)-1" : Mersenne(02)\MBeginEnd$ = "85450982430363380319...44867686961011228671" : Mersenne(02)\MName$ = "M27, " : Mersenne(02)\MDigits$ = ", 13395" ;M27
Mersenne(03)\MExp = 86243 : Mersenne(03)\MExp$ = "(2^86243)-1" : Mersenne(03)\MBeginEnd$ = "53692799550275632152...99857021709433438207" : Mersenne(03)\MName$ = "M28, " : Mersenne(03)\MDigits$ = ", 25962" ;M28
Mersenne(04)\MExp = 110503 : Mersenne(04)\MExp$ = "(2^110503)-1" : Mersenne(04)\MBeginEnd$ = "52192831334175505976...69951621083465515007" : Mersenne(04)\MName$ = "M29, " : Mersenne(04)\MDigits$ = ", 33265" ;M29
Mersenne(05)\MExp = 132049 : Mersenne(05)\MExp$ = "(2^132049)-1" : Mersenne(05)\MBeginEnd$ = "51274027626932072381...52138578455730061311" : Mersenne(05)\MName$ = "M30, " : Mersenne(05)\MDigits$ = ", 39751" ;M30
Mersenne(06)\MExp = 216091 : Mersenne(06)\MExp$ = "(2^216091)-1" : Mersenne(06)\MBeginEnd$ = "74609310306466134368...91336204103815528447" : Mersenne(06)\MName$ = "M31, " : Mersenne(06)\MDigits$ = ", 65050" ;M31
Mersenne(07)\MExp = 756839 : Mersenne(07)\MExp$ = "(2^756839)-1" : Mersenne(07)\MBeginEnd$ = "17413590682008709732...02603793328544677887" : Mersenne(07)\MName$ = "M32, " : Mersenne(07)\MDigits$ = ", 227832" ;M32
Mersenne(08)\MExp = 859433 : Mersenne(08)\MExp$ = "(2^859433)-1" : Mersenne(08)\MBeginEnd$ = "12949812560420764966...02414267243500142591" : Mersenne(08)\MName$ = "M33, " : Mersenne(08)\MDigits$ = ", 258716" ;M33
Mersenne(09)\MExp = 1257787 : Mersenne(09)\MExp$ = "(2^1257787)-1" : Mersenne(09)\MBeginEnd$ = "41224577362142867472...31257188976089366527" : Mersenne(09)\MName$ = "M34, " : Mersenne(09)\MDigits$ = ", 378632" ;M34
Mersenne(10)\MExp = 1398269 : Mersenne(10)\MExp$ = "(2^1398269)-1" : Mersenne(10)\MBeginEnd$ = "81471756441257307514...85532025868451315711" : Mersenne(10)\MName$ = "M35, " : Mersenne(10)\MDigits$ = ", 420921" ;M35
Mersenne(11)\MExp = 2976221 : Mersenne(11)\MExp$ = "(2^2976221)-1" : Mersenne(11)\MBeginEnd$ = "62334007624857864988...76506256743729201151" : Mersenne(11)\MName$ = "M36, " : Mersenne(11)\MDigits$ = ", 895832" ;M36
Mersenne(12)\MExp = 3021377 : Mersenne(12)\MExp$ = "(2^3021377)-1" : Mersenne(12)\MBeginEnd$ = "12741168303009336743...25422631973024694271" : Mersenne(12)\MName$ = "M37, " : Mersenne(12)\MDigits$ = ", 909526" ;M37
Mersenne(13)\MExp = 6972593 : Mersenne(13)\MExp$ = "(2^6972593)-1" : Mersenne(13)\MBeginEnd$ = "43707574412708137883...35366526142924193791" : Mersenne(13)\MName$ = "M38, " : Mersenne(13)\MDigits$ = ", 2098960" ;M38
Mersenne(14)\MExp = 13466917 : Mersenne(14)\MExp$ = "(2^13466917)-1" : Mersenne(14)\MBeginEnd$ = "92494773800670132224...30073855470256259071" : Mersenne(14)\MName$ = "M39, " : Mersenne(14)\MDigits$ = ", 4053946" ;M39
Mersenne(15)\MExp = 20996011 : Mersenne(15)\MExp$ = "(2^20996011)-1" : Mersenne(15)\MBeginEnd$ = "12597689545033010502...94714065762855682047" : Mersenne(15)\MName$ = "M40, " : Mersenne(15)\MDigits$ = ", 6320430" ;M40
Mersenne(16)\MExp = 24036583 : Mersenne(16)\MExp$ = "(2^24036583)-1" : Mersenne(16)\MBeginEnd$ = "29941042940415717208...67436921882733969407" : Mersenne(16)\MName$ = "M41, " : Mersenne(16)\MDigits$ = ", 7235733" ;M41
Mersenne(17)\MExp = 25964951 : Mersenne(17)\MExp$ = "(2^25964951)-1" : Mersenne(17)\MBeginEnd$ = "12216463006127794810...98933257280577077247" : Mersenne(17)\MName$ = "M42, " : Mersenne(17)\MDigits$ = ", 7816230" ;M42
Mersenne(18)\MExp = 30402457 : Mersenne(18)\MExp$ = "(2^30402457)-1" : Mersenne(18)\MBeginEnd$ = "31541647561884608093...11134297411652943871" : Mersenne(18)\MName$ = "M43, " : Mersenne(18)\MDigits$ = ", 9152052" ;M43
Mersenne(19)\MExp = 32582657 : Mersenne(19)\MExp$ = "(2^32582657)-1" : Mersenne(19)\MBeginEnd$ = "12457502601536945540...11752880154053967871" : Mersenne(19)\MName$ = "M44, " : Mersenne(19)\MDigits$ = ", 9808358" ;M44
Mersenne(20)\MExp = 37156667 : Mersenne(20)\MExp$ = "(2^37156667)-1" : Mersenne(20)\MBeginEnd$ = "20225440689097733553...21340265022308220927" : Mersenne(20)\MName$ = "M45, " : Mersenne(20)\MDigits$ = ", 11185272" ;M45
Mersenne(21)\MExp = 42643801 : Mersenne(21)\MExp$ = "(2^42643801)-1" : Mersenne(21)\MBeginEnd$ = "16987351645274162247...84101954765562314751" : Mersenne(21)\MName$ = "M46, " : Mersenne(21)\MDigits$ = ", 12837064" ;M46
Mersenne(22)\MExp = 43112609 : Mersenne(22)\MExp$ = "(2^43112609)-1" : Mersenne(22)\MBeginEnd$ = "31647026933025592314...80022181166697152511" : Mersenne(22)\MName$ = "M47, " : Mersenne(22)\MDigits$ = ", 12978189" ;M47
Mersenne(23)\MExp = 57885161 : Mersenne(23)\MExp$ = "(2^57885161)-1" : Mersenne(23)\MBeginEnd$ = "58188726623224644217...46141988071724285951" : Mersenne(23)\MName$ = "M48, " : Mersenne(23)\MDigits$ = ", 17425170" ;M48
Mersenne(24)\MExp = 74207281 : Mersenne(24)\MExp$ = "(2^74207281)-1" : Mersenne(24)\MBeginEnd$ = "30037641808460618205...87010073391086436351" : Mersenne(24)\MName$ = "M49, " : Mersenne(24)\MDigits$ = ", 22338618" ;M49
Mersenne(25)\MExp = 77232917 : Mersenne(25)\MExp$ = "(2^77232917)-1" : Mersenne(25)\MBeginEnd$ = "46733318335923109998...82730618069762179071" : Mersenne(25)\MName$ = "M50, " : Mersenne(25)\MDigits$ = ", 23249425" ;M50
Mersenne(26)\MExp = 82589933 : Mersenne(26)\MExp$ = "(2^82589933)-1" : Mersenne(26)\MBeginEnd$ = "14889444574204132554...37951210325217902591" : Mersenne(26)\MName$ = "M51, " : Mersenne(26)\MDigits$ = ", 24862048" ;M51
Mersenne(27)\MExp = 101066557 : Mersenne(27)\MExp$ = "(2^101066557)-1" : Mersenne(27)\MBeginEnd$ = "16424172901367078204...54693492600704335871" : Mersenne(27)\MName$ = "Next Test, " : Mersenne(27)\MDigits$ = ", 30424066" ;Next Test
Declare.s FFT_Mul_AVX(Para1, Para2)
Declare Calc()
Declare RangeCharDecimals(Para1)
Declare Comparing()
CompilerIf #PB_Compiler_OS <> #PB_OS_Windows
MessageRequester("Error !", "This is not a Windows-OS !")
End
CompilerEndIf
CompilerIf #PB_Compiler_Processor <> #PB_Processor_x64
MessageRequester("Error !", "This is not a 64-Bit-Windows !")
End
CompilerEndIf
!mov eax,1
!cpuid
!test ecx,10000000h ;Bit28=$10000000
!jnz .IsAVX
MessageRequester("Error !", "No AVX-CPU !")
End
!.IsAVX:
OpenWindow(0, 0, 0, 800, 800, "Helles AVX-FFT-Calculator (Multiplication)", #PB_Window_ScreenCentered | #PB_Window_MinimizeGadget | #PB_Window_Tool)
CompilerIf #PB_Compiler_Unicode
Asc_Uni = 2
CompilerElse
Asc_Uni = 1
CompilerEndIf
LoadFont(0, "Trebuchet MS Fett", 9)
LoadFont(1, "Arial", 8)
Repeat
TextGadget(1, 20, 25, 300, 15, "Select Calculation:")
OptionGadget(2, 20, 50, 400, 15, "Calculation Mersenne Prime (M25...M51)")
OptionGadget(3, 20, 70, 400, 15, "Calculation Multiplication Two Unsigned Integer Factors")
OptionGadget(4, 20, 90, 400, 15, "Calculation Unsigned Integer Power")
OptionGadget(5, 20, 110, 400, 15, "Calculation Factorial")
OptionGadget(6, 20, 130, 400, 15, "Calculation Fermat")
ButtonGadget(7, 300, 200, 200, 30, "S t a r t")
SetGadgetState(2, 1) : FFT = 2 ;Hint Mersenne
For i = 1 To 7
SetGadgetFont(i, FontID(0))
Next
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
If Event = #PB_Event_Gadget
Select EventGadget()
Case 7
Break
Default
FFT = EventGadget()
EndSelect
EndIf
ForEver
For i = 1 To 7
FreeGadget(i)
Next
;=========================================================================
Select FFT
Case 2 ;Mersenne
;19.01.2021 https://www.mersenne.org/:
;All exponents below 54.171.239 have been tested And verified. --> M47 is sure (name)
;All exponents below 101.066.557 have been tested at least once.
Info$ = "Calculation Mersenne Prime "
TextGadget(0, 10, 0, 780, 15, Info$ + "Mxx")
TextGadget(1, 20, 20, 300, 15, "Select Mersenne Prime:")
j = 0
For i = 2 To 28
OptionGadget(i, 20, 40 + (20 * j), 780, 15, "Mersenne " + Mersenne(j)\MName$ + Mersenne(j)\MExp$ + Mersenne(j)\MDigits$ + " Decimal Digits, " + Mersenne(j)\MBeginEnd$)
j + 1
Next
OptionGadget(29, 20, 40 + (20 * j), 780, 15, "M52 " + Mersenne(j)\MName$ + Mersenne(j)\MExp$ + Mersenne(j)\MDigits$ + " Decimal Digits, " + Mersenne(j)\MBeginEnd$)
SetGadgetState(28, 1) ;M51
ButtonGadget(30, 300, 650, 200, 30, "S t a r t C a l c u l a t i o n")
For i = 0 To 30
SetGadgetFont(i, FontID(0))
Next
Basis$ = "2"
Result$ = "2"
j = 28 ;Voreinstellung Mersenne M51, falls nichts ausgewählt wird
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
If Event = #PB_Event_Gadget
i = EventGadget()
Select i
Case 2 To 29
j = i
Case 30
If j = 29
Info$ = "Calculation M52 Next Test="
Else
Info$ + "M" + Str(j + 23) + "="
EndIf
Break
EndSelect
EndIf
ForEver
For i = 1 To 30
FreeGadget(i)
Next
SetGadgetText(0, Info$ + Mersenne(j - 2)\MExp$ + Mersenne(j - 2)\MDigits$ + " Decimal Digits, " + Mersenne(j - 2)\MBeginEnd$)
Potenz = Mersenne(j - 2)\MExp
Calc()
i = @Result$ + StringByteLength(Result$) - Asc_Uni
PokeB(i, PeekB(i) - 1) ;-1 for Mersenne, last digit is 1 or 7
;=========================================================================
Case 3 ;Multiplication
TextGadget(0, 10, 0, 780, 15, "Calculation Multiplication")
TextGadget(1, 50, 20, 300, 20, "Set Factor1 (e.g. String with Ctrl+V)")
TextGadget(2, 450, 20, 300, 20, "Set Factor2 (e.g. String with Ctrl+V)")
ButtonGadget(3, 300, 760, 200, 30, "S t a r t C a l c u l a t i o n")
ButtonGadget(4, 100, 730, 200, 30, "C h e c k F a c t o r 1")
ButtonGadget(5, 500, 730, 200, 30, "C h e c k F a c t o r 2")
For i = 0 To 5
SetGadgetFont(i, FontID(0))
Next
EditorGadget(6, 10, 40, 385, 680)
EditorGadget(7, 405, 40, 385, 680)
SetActiveGadget(6)
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
If Event = #PB_Event_Gadget
Factor1$ = GetGadgetText(6)
Factor2$ = GetGadgetText(7)
Select EventGadget()
Case 3
Break ;Test Null in FFT_Mul_AVX
Case 4
If PeekB(@Factor1$)
RangeCharDecimals(@Factor1$) ;Suche nach Chars, die keine Dezimal-Ziffern sind
Else
MessageRequester("Error Factor1 !", "Length is Null !")
EndIf
Case 5
If PeekB(@Factor2$)
RangeCharDecimals(@Factor2$) ;Suche nach Chars, die keine Dezimal-Ziffern sind
Else
MessageRequester("Error Factor2 !", "Length is Null !")
EndIf
EndSelect
EndIf
ForEver
For i = 1 To 7
FreeGadget(i)
Next
ZeitA = ElapsedMilliseconds()
Result$ = FFT_Mul_AVX(@Factor1$, @Factor2$)
ZeitE = ElapsedMilliseconds() - ZeitA
;=========================================================================
Case 4 ;Potenzen
TextGadget(0, 10, 0, 780, 15, "Calculation X^Y")
TextGadget(1, 50, 20, 300, 20, "Set Base X for X^Y")
TextGadget(2, 400, 20, 300, 20, "Set Exponent Y for X^Y")
StringGadget(3, 50, 40, 300, 20, "123456", #PB_String_Numeric) ;Example
StringGadget(4, 400, 40, 300, 20, "567890", #PB_String_Numeric) ;Example
ButtonGadget(5, 300, 560, 200, 30, "Start Calculation")
SetActiveGadget(2)
For i = 0 To 5 ;das StringGadget einfach mal drin lassen
SetGadgetFont(i, FontID(0))
Next
Repeat
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
Until EventGadget() = 5
Potenz = Val(GetGadgetText(4)) ;Exponent
If (Len(GetGadgetText(4)) > 17) Or (Potenz > 1E16) ;willkürlich gewählte Werte, sichere Seite
MessageRequester("Error!", "Exponent too big! Please make a new input!")
Else
Break
EndIf
ForEver
Basis$ = GetGadgetText(3)
Basis = 0
If Len(Basis$) < 2
Basis = Len(Basis$)
EndIf
For i = 1 To 5
FreeGadget(i)
Next
SetGadgetText(0, "Calculation " + Basis$ + "^" + Str(Potenz))
If Len(Basis$) > 0 Or Basis > 0
If Potenz > 0
Result$ = Basis$
Calc()
Else
Result$ = "1"
EndIf
Else
Result$ = "0" ;0^Y
EndIf
;=========================================================================
Case 5 ;Factorial
Info$ = "Calculation Factorial "
TextGadget(0, 10, 0, 780, 15, Info$)
TextGadget(1, 50, 20, 300, 20, "Input Value:")
StringGadget(2, 50, 40, 300, 20, "6789", #PB_String_Numeric) ;Example
ButtonGadget(3, 300, 560, 200, 30, "S t a r t C a l c u l a t i o n")
SetActiveGadget(2)
For i = 0 To 3 ;das StringGadget einfach mal drin lassen
SetGadgetFont(i, FontID(0))
Next
Repeat
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
Until EventGadget() = 3
Potenz = Val(GetGadgetText(2)) ;hier reicht der Wertebereich von Val() wohl aus ;-) . Den Begriff "Potenz" erweitere ich etwas...
;besser doch Test
If (Len(GetGadgetText(2)) > 7) Or (Potenz > 1E6) ;willkürlich gewählte Werte, sichere Seite
MessageRequester("Error!", "Value too big! Please make a new input!")
Else
Break
EndIf
ForEver
Info$ + Str(Potenz) + "! "
SetGadgetText(0, Info$)
Result$ = "1"
For i = 1 To 3
FreeGadget(i)
Next
EditorGadget(1, 10, 20, 780, 740, #PB_Editor_ReadOnly | #PB_Editor_WordWrap)
ZeitA = ElapsedMilliseconds()
j = 1000
For i = 1 To Potenz
Factor1$ = Str(i)
Result$ = FFT_Mul_AVX(@Result$, @Factor1$)
If i = j
SetGadgetText(1, "Progress-Info: " + Factor1$ + "!" + #LF$ + Result$)
While WaitWindowEvent(0) : Wend ;Time for displaying
j + 1000
EndIf
Next
ZeitE = ElapsedMilliseconds() - ZeitA
FreeGadget(1)
;=========================================================================
Case 6 ;Fermat
;Fn=(2^(2^n))+1 n=0,1,2,3... F0=3, F1=5, F2=17, F3=257, F4=65.537, F5=4.294.967.297, F6=18.446.744.073.709.551.617, F7=340.282.366.920.938.463.463.374.607.431.768.211.457 ...
Info$ = "Calculation Fermat "
TextGadget(0, 10, 0, 780, 15, Info$)
TextGadget(1, 50, 20, 300, 20, "Input Value (0...30):")
StringGadget(2, 50, 40, 300, 20, "7", #PB_String_Numeric) ;Example
ButtonGadget(3, 300, 560, 200, 30, "S t a r t C a l c u l a t i o n")
SetActiveGadget(2)
For i = 0 To 3 ;das StringGadget einfach mal drin lassen
SetGadgetFont(i, FontID(0))
Next
Repeat
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
Until EventGadget() = 3
Potenz = Val(GetGadgetText(2)) ;hier reicht der Wertebereich von Val() wohl aus ;-) . Den Begriff "Potenz" erweitere ich etwas...
;besser doch Test
If Potenz > 30 ;willkürlich gewählte Werte, sichere Seite
MessageRequester("Error!", "Value too big! Please make a new input!")
Else
Break
EndIf
ForEver
Info$ + "F" + Str(Potenz)
SetGadgetText(0, Info$)
For i = 1 To 3
FreeGadget(i)
Next
EditorGadget(1, 10, 20, 780, 740, #PB_Editor_ReadOnly | #PB_Editor_WordWrap)
Basis$ = "2"
If Potenz
Result$ = Basis$
Else
Result$ = "0"
EndIf
Calc()
Potenz = Val(Result$)
Result$ = Basis$
FFT = 0 ;wegen "Weiter"
Calc()
i = @Result$ + StringByteLength(Result$) - Asc_Uni
PokeB(i, PeekB(i) + 1) ;+1 for Fermat, last digit is 7
FreeGadget(1)
;=========================================================================
EndSelect
;=========================================================================
EditorGadget(1, 10, 20, 780, 730, #PB_Editor_ReadOnly | #PB_Editor_WordWrap)
If Asc(Mid(Result$, 1, 1)) < 58 ;also Ziffer (hoffentlich :-) !) und nicht z.B.Fehlermeldung
SetGadgetText(1, "Digits: " + Str(Len(Result$)) + #LF$ + "Time: " + Str(ZeitE) + "ms" + #LFCR$ + Result$)
Else
SetGadgetText(1, Result$ + "??? Mistake!?")
EndIf
ButtonGadget(2, 15, 760, 250, 30, "Copy to Clipboard")
SetGadgetFont(2, FontID(0))
ButtonGadget(3, 275, 760, 250, 30, "Comparison with File")
SetGadgetFont(3, FontID(0))
ButtonGadget(4, 535, 760, 250, 30, "Back to Selection")
SetGadgetFont(4, FontID(0))
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
Break 2
EndIf
If Event = #PB_Event_Gadget
Select EventGadget()
Case 2
SetClipboardText(Result$)
Case 3
For i = 1 To 4
HideGadget(i, 1)
Next
Comparing()
For i = 1 To 4
HideGadget(i, 0)
Next
Case 4
Break
EndSelect
EndIf
ForEver
For i = 0 To 4
FreeGadget(i)
Next
Until WaitWindowEvent() = #PB_Event_CloseWindow
CloseWindow(0)
End
Procedure.s FFT_Mul_AVX(PFac1, PFac2)
EnableASM
Protected.q BufferCosSin, BufferCosSinA, BufferDez, BufferDezA, BufferFX, BufferFXA, BufferFaktor1, BufferFaktor1A
Protected.q BufferFaktor2, BufferFaktor2A, BufferProdukt, BufferProduktA, BufferReverse, BufferReverseA, LG, N
Protected Ausgabe$
;Register-, Status- und Control- Speicher-Reservierung
BufferFX = AllocateMemory(512 + 64)
If BufferFX
BufferFXA = BufferFX + (64 - (BufferFX & $3F)) ;64-er Alignment, klotzen, nicht kleckern!
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferFXA
!pop [BufferFXA]
;Save Registers
!mov rax,[BufferFXA]
!fxsave64 [rax] ;sichert erstmal komplett FPU (mit MXCSR) und XMM (0-15). Lass ich mal so, obwohl FNSAVE auch reichen würde. Dann aber so keine Register-Sicherung
!mov [rax+464],rdi ;sichert callee-save registers, ohne RBX und RBP!
!mov [rax+472],rsi
!mov [rax+480],r12
!mov [rax+488],r13
!mov [rax+496],r14
!mov [rax+504],r15
;übergebende Parameter
!mov r8,[p.v_PFac1]
!mov [PF1],r8
!mov r9,[p.v_PFac2]
!mov [PF2],r9
!mov rax, -16
!mov rdx, -16
!pxor xmm1,xmm1 ;xmm1 null setzen, ist praktisch das Zero-Byte bzw. 2 Zero-Bytes für Unicode
;Länge der Strings ermitteln
CompilerIf #PB_Compiler_Unicode
!@@:
!add rax,16
!vpcmpistri xmm1,dqword[r8+rax],00001001b ;Bit0 = 1 und Bit1 = 0, also String-Chars werden als unsigned Words interpretiert, Bit2 = 0 und Bit3 = 1, also Test auf equal each
!jnz @b
!shr rax,1 ;Pointer in String muss hier halbiert werden
!add rax,rcx
!pxor xmm1,xmm1 ;xmm1 null setzen, ist praktisch das Zero-Byte bzw. 2 Zero-Bytes für Unicode
!@@:
!add rdx,16
!vpcmpistri xmm1,dqword[r9+rdx],00001001b ;Bit0 = 1 und Bit1 = 0, also String-Chars werden als unsigned Words interpretiert, Bit2 = 0 und Bit3 = 1, also Test auf equal each
!jnz @b
!shr rdx,1 ;Pointer in String muss hier halbiert werden
!add rdx,rcx
CompilerElse
!@@:
!add rax,16
!vpcmpistri xmm1,dqword[r8+rax],00001000b ;Bit0 = 0 und Bit1 = 0, also String-Chars werden als unsigned Bytes interpretiert, Bit2 = 0 und Bit3 = 1, also Test auf equal each
!jnz @b
!add rax,rcx
!pxor xmm1,xmm1 ;xmm1 null setzen, ist praktisch das Zero-Byte bzw. 2 Zero-Bytes für Unicode
!@@:
!add rdx,16
!vpcmpistri xmm1,dqword[r9+rdx],00001000b ;Bit0 = 0 und Bit1 = 0, also String-Chars werden als unsigned Bytes interpretiert, Bit2 = 0 und Bit3 = 1, also Test auf equal each
!jnz @b
!add rdx,rcx
CompilerEndIf
!mov [L1],rax
!mov [L2],rdx
!cmp rax,rdx ;L1 soll der größere Faktor sein
!jae @f
!mov [L1],rdx
!mov [L2],rax
!mov [PF1],r9
!mov [PF2],r8
!@@:
!add rax,rdx
!push rax
POP LG ;Gesamtlänge L1+L2 für "Verstringung" und Speicher-Management
;LenF = L1
!mov rdx,[L1] ;L1 ist größter Faktor
!mov [LenF],rdx ;LenF erstmal setzen, könnte ja schon 2-er Potenz sein
!bsr rcx,[L1] ;höchstes gesetztes Bit
!bsf rdx,[L2] ;Test auf Länge Null
!jz @f
!mov [Exponent],rcx
!bsf rdx,[L1] ;niedrigstes gesetztes Bit
!jmp .Faktoren_OK ;local label
!@@:
Ausgabe$ = "Error! Size of a factor is null! " ;wegen Kompatibilität zu "normaler" Multiplikation gelassen
ProcedureReturn Ausgabe$
!.Faktoren_OK:
!cmp rcx,rdx
!je @f ;also nur 1 Bit (identisch) gesetzt, ist 2-er Potenz
!inc rcx
!mov rax,1
!shl rax,cl
!mov [LenF],rax ;LenF ist jetzt 2-er Potenz
!add [Exponent],1
!@@:
!add [Exponent],1
!mov rcx,5 ;Produkt-Länge muss mindestens 32 sein, sonst an anderen Stellen Klimmzüge ("Verstringung")
!sub rcx,[Exponent]
!js @f ;Exponent ist > 5 (Hauptfall)
!jz @f ;Exponent ist = 5
!shl [LenF],cl
!add [Exponent],rcx
!@@:
;zur Sicherheit im MXCSR Bit 13 und 14 auf Null setzen (round to nearest)
;MXCSR[6] : Denormals are zeros - 0
;MXCSR[7:12] : Exception masks all 1's (all exceptions masked)
;MXCSR[13:14] : Rounding control - 0 (round to nearest)
;MXCSR[15] : Flush to zero for masked underflow - 0 (off)
;R+ bit 14 Round Positive
;R- bit 13 Round Negative
;RZ bits 13 and 14 Round to Zero
;RN bits 13 and 14 are 0 Round to Nearest
;load MXCSR
!lea rcx,[MXCSR]
!lea rdx,[MXCSR_Sicher]
!vstmxcsr [rcx] ;load Control-and Status-Register in Memory
!mov eax,[rcx]
!mov [rdx],eax ;trotz fxsave64 (s.o.) hier zur Sicherheit
;MXCSR_Sicher = MXCSR
!and dword[rcx],11111111111111111001111111111111b ;zur Sicherheit Bit 13 und 14 auf Null setzen (round to nearest)
!vldmxcsr [rcx] ;store MXCSR
;N = LenF << 1
!mov rax,[LenF]
!shl rax,1
!mov [N],rax
!push rax ;wird dann "PB-N"
!vcvtsi2sd xmm1,xmm2,rax ;Quad-N in Double-ND konvertieren für spätere Division
!vmovsd [ND],xmm1
;Pointer_Produkt.q = N - 1
!dec rax
!mov [Pointer_Produkt],rax
POP N ;wird u.a.für AllocateMemory benötigt!
;Speicher-Reservierung
BufferCosSin = AllocateMemory((2 * N * 8) + 64)
If BufferCosSin
BufferCosSinA = BufferCosSin + (64 - (BufferCosSin & $3F)) ;64-er Alignment, klotzen, nicht kleckern!
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferCosSinA
!pop [BufferCosSinA]
BufferFaktor1 = AllocateMemory((2 * N * 8) + 64)
If BufferFaktor1
BufferFaktor1A = BufferFaktor1 + (64 - (BufferFaktor1 & $3F)) ;64-er Alignment
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferFaktor1A
!pop [BufferFaktor1A]
BufferFaktor2 = AllocateMemory((2 * N * 8) + 64)
If BufferFaktor2
BufferFaktor2A = BufferFaktor2 + (64 - (BufferFaktor2 & $3F)) ;64-er Alignment
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferFaktor2A
!pop [BufferFaktor2A]
;========================================================
;Faktoren bearbeiten
!bsf rax,[N] ;Da N hier nie Null sein kann tuts BSF
!mov [Exponent],rax
!mov rdx,32
!sub rdx,rax
!vmovq xmm9,rdx ;32 - Exponent
!mov rsi,[N]
!mov r8,[PF1]
!mov r10,[PF2]
!mov r11,[BufferCosSinA] ;Temp-Memory
!mov r9,r11
!add r9,rsi
!add r9,rsi
!mov r14,[L1]
!mov rcx,[v_Asc_Uni]
!dec rcx
!shl r14,cl
!sub r14,[v_Asc_Uni]
!mov r15,[L2]
!shl r15,cl
!sub r15,[v_Asc_Uni]
!mov rcx,2
!sub rcx,[v_Asc_Uni]
!shr rsi,cl
!sub rsi,[v_Asc_Uni]
;------------------
!mov rcx,3
!@@:
!mov [r11+rcx*4],ecx
!dec rcx
!jns @b
!vmovdqa xmm0,[r11] ;Start-Values 0,1,2,3
!vbroadcastss xmm1,[V1] ;AVX Missbrauch von Float, was solls... kostet aber Speed
!vbroadcastss xmm2,[V2]
!vbroadcastss xmm3,[V3]
!vbroadcastss xmm4,[V4]
!vbroadcastss xmm11,[VVier]
!mov rdi,[BufferFaktor1A] ;Temp-Memory
!xor r12,r12
;------------------------------------
!.Loop_Rev_Fak:
!vpsrld xmm5,xmm0,1 ;>> 1
!vpand xmm6,xmm5,xmm1 ;& V1
!vpand xmm7,xmm0,xmm1 ;& V1
!vpslld xmm8,xmm7,1 ;<< 1
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,2 ;>> 2
!vpand xmm6,xmm5,xmm2 ;& V2
!vpand xmm7,xmm10,xmm2 ;& V2
!vpslld xmm8,xmm7,2 ;<< 2
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,4 ;>> 4
!vpand xmm6,xmm5,xmm3 ;& V3
!vpand xmm7,xmm10,xmm3 ;& V3
!vpslld xmm8,xmm7,4 ;<< 4
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,8 ;>> 8
!vpand xmm6,xmm5,xmm4 ;& V4
!vpand xmm7,xmm10,xmm4 ;& V4
!vpslld xmm8,xmm7,8 ;<< 8
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,16 ;>> 16
!vpslld xmm8,xmm10,16 ;<< 16
!vpor xmm10,xmm5,xmm8 ;|
!vpsrld xmm5,xmm10,xmm9 ;>> (32 - Exponent)
!vmovdqa [rdi],xmm5
!xor r13,r13
!.Next_Byte4:
!mov r12d,[rdi+r13*4] ;4 reverse Positionen
!xor rax,rax
!mov rdx,r14
!mov rcx,[v_Asc_Uni]
!dec rcx
!shr rdx,cl
!cmp [L1],rdx
!jb @f
!movzx eax,byte[r8+r14] ;next Byte Faktor1$
!and eax,0fh ;for String (=sub 30h)
!@@:
!mov [r9+r12*2],eax
!xor rax,rax
!mov rdx,r15
!mov rcx,[v_Asc_Uni]
!dec rcx
!shr rdx,cl
!cmp [L2],rdx
!jb @f
!movzx eax,byte[r10+r15] ;next Byte Faktor2$
!and eax,0fh
!@@:
!mov [r11+r12*2],eax
!sub rsi,[v_Asc_Uni]
!sub r14,[v_Asc_Uni]
!sub r15,[v_Asc_Uni]
!inc r13
!cmp r13,4
!jne .Next_Byte4
!vpaddd xmm0,xmm0,xmm11
!or rsi,rsi
!jns .Loop_Rev_Fak
;========================================================
;Integer in Double konvertieren, verdoppeln und abspeichern
!xor rdx,rdx
!xor r8,r8
!mov rcx,[N]
!shr rcx,3
!mov r10,[BufferFaktor1A]
!mov r11,[BufferCosSinA]
!mov r12,[BufferFaktor2A]
!mov r9,r11
!mov rsi,[N]
!add r9,rsi
!add r9,rsi
!@@:
;Factor1
!vcvtdq2pd ymm0,[r9+r8] ;4 Ziffern (als DWords) von Faktor1 in 4 Doubles konvertieren
!vmovsd [r10+rdx],xmm0
!vmovsd [r10+rdx+16],xmm0 ;+8 ist der I-Anteil, bleibt hier Null!
!vunpckhpd ymm1,ymm0,ymm0 ;AVX
!vmovsd [r10+rdx+32],xmm1
!vmovsd [r10+rdx+48],xmm1
!vperm2f128 ymm2,ymm0,ymm0,1
!vmovsd [r10+rdx+64],xmm2
!vmovsd [r10+rdx+80],xmm2
!vunpckhpd ymm1,ymm2,ymm0
!vmovsd [r10+rdx+96],xmm1
!vmovsd [r10+rdx+112],xmm1
;Factor2
!vcvtdq2pd ymm0,[r11+r8]
!vmovsd [r12+rdx],xmm0
!vmovsd [r12+rdx+16],xmm0
!vunpckhpd ymm1,ymm0,ymm0
!vmovsd [r12+rdx+32],xmm1
!vmovsd [r12+rdx+48],xmm1
!vperm2f128 ymm2,ymm0,ymm0,1
!vmovsd [r12+rdx+64],xmm2
!vmovsd [r12+rdx+80],xmm2
!vunpckhpd ymm1,ymm2,ymm0
!vmovsd [r12+rdx+96],xmm1
!vmovsd [r12+rdx+112],xmm1
!add r8,16
!add rdx,128
!sub rcx,1
!jnz @b
;========================================================
;Winkel erst hier, weil BufferCosSinA oben "missbraucht" wird
!vcvtsi2sd xmm0,xmm0,qword[N]
!vmovsd xmm1,[Pi2]
!vdivsd xmm7,xmm1,xmm0 ;Schrittweite Konstante
!vmovq xmm6,qword[Signum] ;Konstante
!mov rcx,[N]
!shr rcx,3
!xor r8,r8 ;Schritt-Nr. Konstante
!mov r9,[N]
!shr r9,2 ;N/4 für Adressierung unten
!mov r10,[N]
!shr r10,1 ;N/2
!mov r11,[BufferCosSinA]
!inc rcx
!@@:
!vcvtsi2sd xmm15,xmm14,r8
!vmulsd xmm0,xmm7,xmm15
!vmovddup xmm0,xmm0 ;XMM0: 1=x^1 2=x^1 duplicate bits 0-63 in bits 64-127
!vinsertf128 ymm1,ymm0,xmm0,1b ;YMM1: 1=x^1 2=x^1 3=x^1 4=x^1 YMM1(0-127)=XMM0, YMM1(128-255)=XMM0
!vmulpd ymm2,ymm1,ymm1 ;YMM2: 1=x^2 2=x^2 3=x^2 4=x^2
!vmulsd xmm3,xmm2,xmm2 ;YMM3: 1=x^4 2=x^2 3=0 4=0
!vmulpd ymm4,ymm2,ymm2 ;YMM4: 1=x^4 2=x^4 3=x^4 4=x^4
!vmulpd ymm2,ymm4,ymm4 ;YMM2: 1=x^8 2=x^8 3=x^8 4=x^8 for the next 4 terms
!vmovdqa xmm8,xmm3 ;save for Cos
!vmulpd xmm9,xmm8,xmm4
!vperm2f128 ymm10,ymm9,ymm8,100000b
!vmulpd xmm14,xmm3,xmm1 ;YMM14: 1=x^5 2=x^3 3=0 4=0
!vmulpd xmm1,xmm14,xmm4 ;YMM1: 1=x^9 2=x^7 3=0 4=0
!vperm2f128 ymm5,ymm1,ymm14,100000b ;YMM5: 1=x^9 2=x^7 3=x^5 4=x^3 YMM5(0-127)=YMM1(0-127), YMM5(128-255)=YMM14(0-127)
!vmulpd ymm1,ymm5,yword[RezFakSin] ;multiply the 4 values in YMM5 with RezFak -1/3! ... 1/9!, result in YMM1
!vmulpd ymm11,ymm10,yword[RezFakCos]
;next 4 terms, without loop
!vmulpd ymm14,ymm5,ymm2 ;YMM14: 1=x^17 2=x^15 3=x^13 4=x^11
!vmulpd ymm10,ymm10,ymm2
!vmulpd ymm15,ymm14,yword[RezFakSin+32]
!vaddpd ymm1,ymm15,ymm1
!vmulpd ymm15,ymm10,yword[RezFakCos+32]
!vaddpd ymm11,ymm15,ymm11
;next 4 terms
!vmulpd ymm5,ymm14,ymm2 ;YMM5: 1=x^25 2=x^23 3=x^21 4=x^19
!vmulpd ymm10,ymm10,ymm2
!vmulpd ymm15,ymm5,yword[RezFakSin+64]
!vaddpd ymm1,ymm15,ymm1
!vmulpd ymm15,ymm10,yword[RezFakCos+64]
!vaddpd ymm11,ymm15,ymm11
;result
!vhaddpd ymm2,ymm1,ymm1 ;YMM2: 1=1+2 of YMM1, 3=3+4 of YMM1
!vhaddpd ymm12,ymm11,ymm11
!vextractf128 xmm1,ymm2,1b ;XMM1: 3+4 of YMM2
!vextractf128 xmm11,ymm12,1b
!vaddsd xmm3,xmm2,xmm1 ;XMM3: 1=sum of iterations
!vaddsd xmm13,xmm12,xmm11
!vaddsd xmm12,xmm0,xmm3 ;Sine XMM12: 1=sum of iterations plus start-value (1.term=x)
!vaddsd xmm14,xmm13,[Eins] ;Cosine
;------------------------------------
;store values 0-180°
!mov rax,r8
!shl rax,1
!vmovsd qword[r11+rax*8+8],xmm12 ;Sin 0-45°
!vorpd xmm1,xmm12,xmm6 ;neg Sin
!vmovsd qword[r11+rax*8],xmm14 ;Cos 0-45°
!mov rax,r9
!sub rax,r8
!shl rax,1
!vmovsd qword[r11+rax*8],xmm12 ;Cos 45-90°
!mov rdx,r9
!add rdx,r8
!shl rdx,1
!vmovsd qword[r11+rdx*8],xmm1 ;Cos 90-135°
!mov r13,r10
!sub r13,r8
!shl r13,1
!vorpd xmm2,xmm14,xmm6
!vmovsd qword[r11+r13*8],xmm2 ;Cos 135-180°
!vmovsd qword[r11+rax*8+8],xmm14 ;Sin 45-90°
!vmovsd qword[r11+rdx*8+8],xmm14 ;Sin 90-135°
!vmovsd qword[r11+r13*8+8],xmm12 ;Sin 135-180°
!inc r8
!dec rcx
!jnz @b
;========================================================
;jetzt die beiden Faktoren transformieren
!mov rsi,[N]
!mov r8,[BufferCosSinA]
!mov rcx,1 ;Exponent_FFT
!mov r12,2 ;Pointer0
!mov r9,2 ;Pointer2
!mov r10,[BufferFaktor1A]
!mov rdi,[BufferFaktor2A]
!.OuterLoopF:
!inc rcx ;Exponent_FFT
!mov rax,rsi ;N
!shr rax,cl
!jz .EndF
!mov r14,rax
!.InnerLoopF:
!xor r11,r11 ;Pointer1
!mov r15,r12 ;Pointer0
;------------------------------------
!@@:
!mov rax,r14
!mul r11 ;Pointer1
!shl rax,3 ;EW*8, Pointer in CosSin
!mov rdx,r9 ;Pointer2
!mov r13,rdx
!shl rdx,4 ;Pointer2*8
!sub r13,r12 ;[v_Pointer0]
!shl r13,4 ;(Pointer2 - Pointer0) * 8
!vperm2f128 ymm0,ymm9,yword[r8+rax],100010b ;2x CosSinA in ymm0, ymm9 is Dummy
!vmovapd xmm7,dqword[rdi+rdx] ;BufferFaktor2A complete (R and I)
!vperm2f128 ymm2,ymm7,yword[r10+rdx],10b
!vmulpd ymm4,ymm2,ymm0
!vshufpd ymm1,ymm0,ymm0,101b ;change Sin and Cos
!vhsubpd ymm3,ymm4,ymm4 ;WR
!vmulpd ymm4,ymm2,ymm1
!vhaddpd ymm5,ymm4,ymm4 ;WI
!vshufpd ymm15,ymm3,ymm5,0b ;WI in ymm15-high
!vmovapd xmm1,dqword[r10+r13]
!vmovapd xmm7,dqword[rdi+r13]
!vperm2f128 ymm2,ymm7,ymm1,10b
!vaddpd ymm14,ymm15,ymm2
!vsubpd ymm13,ymm2,ymm15
!vmovapd [r10+r13],xmm14
!vperm2f128 ymm2,ymm7,ymm14,11b
!vmovapd [rdi+r13],xmm2
!vmovapd [r10+rdx],xmm13
!vperm2f128 ymm2,ymm7,ymm13,11b
!vmovapd [rdi+rdx],xmm2
!add r11,2 ;Pointer1
!inc r9 ;Pointer2
!dec r15
!jnz @b
!add r9,r12 ;Pointer0
!cmp r9,rsi ;N
!jb .InnerLoopF
!shl r12,1
!mov r9,r12
!jmp .OuterLoopF
!.EndF:
;========================================================
;Und jetzt die beiden transformierten Vektoren miteinander multiplizieren
!mov r8,[BufferFaktor1A] ;nimmt auch das Produkt auf
!mov r9,[BufferFaktor2A]
!xor r11,r11 ;Pointer
!mov rcx,[N]
!shr rcx,1
!vmovdqu ymm5,[Null1] ;Null,Signum,Null,Signum
!@@:
!vmovapd ymm0,[r8+r11]
!vmovapd ymm3,[r9+r11]
!vmulpd ymm1,ymm0,ymm3 ;ymm1:R*R,I*I,R*R,I*I
!vshufpd ymm2,ymm3,ymm3,101b ;change R and I
!vxorpd ymm4,ymm1,ymm5 ;Vorzeichen ändern, damit nicht sub sondern add für beide Terme verwendet werden kann
!vmulpd ymm1,ymm0,ymm2 ;R*I, R*I
!vhaddpd ymm6,ymm4,ymm1
!vmovapd [r8+r11],ymm6
!add r11,32
!dec rcx
!jnz @b
;========================================================
;Rück-Transformation
;Produkt bearbeiten
BufferReverse = AllocateMemory(32 + 64)
If BufferReverse
BufferReverseA = BufferReverse + (64 - (BufferReverse & $3F)) ;64-er Alignment
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferReverseA
!pop [BufferReverseA]
!mov rsi,[N] ;für Abbruch Schleife Loop_Rev_Prod; letzte Pos.steht ja fest (wie erste)
!shl rsi,1
!sub rsi,2
!mov r8,[BufferFaktor1A] ;Ergebnis von Multiplikation oben
!mov r9,[BufferFaktor2A] ;hier neue Reihenfolge rein
!mov rdx,32
!sub rdx,[Exponent]
!vmovq xmm9,rdx
;----------------------------------------------
!mov rcx,3
!@@:
!mov [r9+rcx*4],ecx
!dec rcx
!jns @b
!vmovdqa xmm0,[r9] ;Start-Values 0,1,2,3
!vbroadcastss xmm1,[V1] ;AVX Missbrauch von Float, was solls... kostet aber Speed
!vbroadcastss xmm2,[V2]
!vbroadcastss xmm3,[V3]
!vbroadcastss xmm4,[V4]
!vbroadcastss xmm11,[VVier]
!mov rdi,[BufferReverseA]
!xor r12,r12
;------------------------------------
!.Loop_Rev_Prod:
!vpsrld xmm5,xmm0,1 ;>> 1
!vpand xmm6,xmm5,xmm1 ;& V1
!vpand xmm7,xmm0,xmm1 ;& V1
!vpslld xmm8,xmm7,1 ;<< 1
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,2 ;>> 2
!vpand xmm6,xmm5,xmm2 ;& V2
!vpand xmm7,xmm10,xmm2 ;& V2
!vpslld xmm8,xmm7,2 ;<< 2
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,4 ;>> 4
!vpand xmm6,xmm5,xmm3 ;& V3
!vpand xmm7,xmm10,xmm3 ;& V3
!vpslld xmm8,xmm7,4 ;<< 4
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,8 ;>> 8
!vpand xmm6,xmm5,xmm4 ;& V4
!vpand xmm7,xmm10,xmm4 ;& V4
!vpslld xmm8,xmm7,8 ;<< 8
!vpor xmm10,xmm6,xmm8 ;|
!vpsrld xmm5,xmm10,16 ;>> 16
!vpslld xmm8,xmm10,16 ;<< 16
!vpor xmm10,xmm5,xmm8 ;|
!vpsrld xmm5,xmm10,xmm9 ;>> (32 - Exponent)
!vmovdqa [rdi],xmm5
!xor rcx,rcx
!@@:
!mov r12d,[rdi+rcx*4]
!shl r12,1
!vmovapd xmm12,[r8] ;16 Bytes (R.d und I.d)
!vmovapd [r9+r12*8],xmm12
!add r8,16
!inc rcx
!cmp rcx,4
!jne @b
!vpaddd xmm0,xmm0,xmm11
!cmp r12,rsi ;Position letzter Wert?
!jne .Loop_Rev_Prod
;========================================================
!mov r8,[BufferFaktor2A]
!mov rcx,[N]
!shr rcx,1
!@@:
!vmovdqa xmm0,[r8]
!vaddpd xmm1,xmm0,[r8+16]
!vmovsd [r8],xmm1
!vunpckhpd xmm2,xmm1,xmm1
!vmovsd [r8+8],xmm2
!vsubpd xmm1,xmm0,[r8+16]
!vmovsd [r8+16],xmm1
!vunpckhpd xmm2,xmm1,xmm1
!vmovsd [r8+24],xmm2
!add r8,32
!dec rcx
!jnz @b
;------------------------------------
!mov rsi,[N]
!mov r8,[BufferCosSinA]
!mov rcx,1 ;Exponent_FFT
!mov r12,2 ;Pointer0
!mov r9,2 ;Pointer2
!mov r10,[BufferFaktor2A]
!.OuterLoopP:
!inc rcx ;Exponent_FFT
!mov rax,rsi ;N
!shr rax,cl
!jz .EndP
!mov r14,rax
!shl r14,3 ;EW*8
!.InnerLoopP:
!xor r11,r11 ;Pointer1
!mov r15,r12
;------------------------------------
!@@:
!mov rax,r14
!mul r11 ;Pointer1 oder imul rax,r11 ;dann wäre rdx frei
!mov rdx,r9 ;Pointer2
!mov r13,rdx
!shl rdx,4
!sub r13,r12 ;[v_Pointer0]
!shl r13,4
!vmovapd xmm7,dqword[r10+r13]
!vmovapd xmm0,dqword[r10+rdx] ;BufferFaktor2A
!vmovapd xmm1,dqword[r8+rax] ;CosSinA
!vshufpd xmm2,xmm0,xmm0,1b ;R und I vertauschen
!vmulpd xmm3,xmm2,xmm1
!vmulpd xmm4,xmm0,xmm1
!vxorpd xmm5,xmm3,[Null1] ;Signum, xmm3 negieren um vhaddpd verwenden zu können (sonst sub) wie xmm4
!vhaddpd xmm6,xmm4,xmm5 ;WI in xmm15-high, WR in Low
!vaddpd xmm14,xmm7,xmm6 ;dqword[r10+r13]
!vsubpd xmm13,xmm7,xmm6
!vmovapd [r10+r13],xmm14
!vmovapd [r10+rdx],xmm13
!add r11,2 ;Pointer1
!inc r9 ;Pointer2
!dec r15
!jnz @b
;---------------------------------------
!add r9,r12 ;Pointer0
!cmp r9,rsi ;N
!jb .InnerLoopP
!shl r12,1
!mov r9,r12
!jmp .OuterLoopP
!.EndP:
;========================================================
FreeMemory(BufferReverse)
FreeMemory(BufferCosSin)
FreeMemory(BufferFaktor1)
BufferProdukt = AllocateMemory(N + 64)
If BufferProdukt
BufferProduktA = BufferProdukt + (64 - (BufferProdukt & $3F)) ;64-er Alignment
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferProduktA
!pop [BufferProduktA]
BufferDez = AllocateMemory(16 + 64)
If BufferDez
BufferDezA = BufferDez + (64 - (BufferDez & $3F)) ;64-er Alignment
Else
Ausgabe$ = "-1"
ProcedureReturn Ausgabe$
EndIf
PUSH BufferDezA
!pop [BufferDezA]
;========================================================
;Ergebnis für Ausgabe aufbereiten
;Konvertierung Double in Integer, Rundung gemäß Rounding Control Bits im MXCSR-Register, oben auf "nearest" gesetzt
;da BufferFaktor2A noch den I-Anteil enthält, wird hier "komprimiert", d.h. in die I-Bereiche werden die Produkte reinkopiert
;die Double-Werte in Long-Integer konvertieren, Division ist notwendig für höhere Genauigkeit; nicht später shiften!
!mov r15,[N]
!mov r14,[BufferFaktor2A]
!mov r8,r14
!add r14,16 ;der 1.Wert bleibt natürlich
!add r8,8
!@@:
!mov rax,[r14]
!mov [r8],rax
!add r14,16
!add r8,8
!dec r15
!jnz @b
;die Double-Werte in Long-Integer konvertieren, Division ist notwendig für Genauigkeit; nicht später shiften!
!mov r15,[N]
!mov r14,[BufferFaktor2A]
!shr r15,2
!mov rcx,r14
!@@:
!vmovapd ymm0,[r14]
!vbroadcastsd ymm1,[ND]
!vdivpd ymm2,ymm0,ymm1
!vcvtpd2dq xmm3,ymm2
!vmovapd [rcx],xmm3
!add r14,32
!add rcx,16
!dec r15
!jnz @b
;Hex2Dez und "Verstringung"
;Pointer_Produkt = N - 1
;For i = 0 To N - 1 ;vom Produkt alle Werte durch
!mov r15,[N]
!mov r14,[BufferFaktor2A]
!mov rdx,[BufferProduktA]
!mov rdi,r15
!dec rdi
!lea r10,[Dezi]
!mov r11,[BufferDezA]
!.DeziLoop:
!mov eax,[r14] ;eax=Integer-Wert
;Hex2Dez
;Ziffer = 0
!xor r12b,r12b
;Pointer1 = 0
!xor r8,r8
;Pointer2 = 0
!xor r9,r9
;While PeekQ(?Dezi + Pointer1) <> 0 oder Zähler wie jetzt
!mov rsi,10 ;Anzahl Dezi-Werte ohne Null, 9 reicht für M49, ansonsten auch Dezi anpassen an Faktoren(Produkt)-Länge!
!.OuterLoopDez1:
;While (QuadWert - PeekQ(?Dezi + Pointer1)) >= 0
!mov r13d,eax
!.InnerLoopDez1:
!sub r13d,[r10+r8]
!js .InnerLoopDez1End
;Ziffer + 1
!inc r12b
;QuadWert - PeekQ(?Dezi + Pointer1)
;!sub rax,[r10+r8]
!mov eax,r13d ;AL ist unten letzte Ziffer
;Wend (QuadWert - PeekQ(?Dezi + Pointer1)) >= 0
!jmp .InnerLoopDez1
!.InnerLoopDez1End:
;PokeB(BufferDezA + Pointer2, Ziffer)
!mov [r11+r9],r12b
;Pointer1 + 8
!add r8,8
;Pointer2 + 1
!inc r9
;Ziffer = 0
!xor r12b,r12b
!dec rsi
;Wend PeekQ(?Dezi + Pointer1) <> 0 oder Zähler wie jetzt
!jnz .OuterLoopDez1
;PokeB(BufferDezA + Pointer2, QuadWert1) ;letzte Dezimal-Ziffer abspeichern
!mov [r11+r9],al ;AL ist letzte Ziffer von oben
;Pointer_Produkt1 = Pointer_Produkt
!mov rsi,rdi
!mov rax,10 ;Anzahl Dezi-Werte ohne Null, 9 reicht für M49, ansonsten auch Dezi anpassen an Faktoren(Produkt)-Länge!
;!mov rdx,[BufferProduktA]
;For k = 0 To 15
!@@:
;Ziffer = PeekB(BufferProduktA + Pointer_Produkt1)
!mov r12b,[rdx+rsi]
;Ziffer + PeekB(BufferDezA + Pointer2)
!add r12b,[r11+r9]
;PokeB(BufferProduktA + Pointer_Produkt1, Ziffer)
!mov [rdx+rsi],r12b
;If Ziffer > 9 ;Übertrag
!cmp r12b,9 ;Übertrag?
!jbe .ZifferOK
;Ziffer - 10
!sub r12b,10
;PokeB(BufferProduktA + Pointer_Produkt1, Ziffer)
!mov [rdx+rsi],r12b
;Ziffer = PeekB(BufferProduktA + Pointer_Produkt1 - 1) + 1
!mov r12b,[rdx+rsi-1]
!inc r12b
;PokeB(BufferProduktA + Pointer_Produkt1 - 1, Ziffer)
!mov [rdx+rsi-1],r12b
;EndIf
!.ZifferOK:
;Pointer_Produkt1 - 1
!dec rsi
!js .Reicht
;Pointer2 - 1
!dec r9
!dec rax
;Next
!jnz @b
!.Reicht:
;Pointer_Produkt - 1
!dec rdi
!add r14,4
!dec r15
;Next
!jnz .DeziLoop
;------------------------
;Ergebnis (Produkt) als ASCII-Ziffern abspeichern
!vbroadcastsd ymm0,[AsciiNull] ;AVX Missbrauch von Float, was solls... kostet aber Speed
!mov r9,[BufferProduktA]
!xor r10,r10
!mov rcx,[N]
!shr rcx,4 ;/16, XMM-Length
!@@:
!vpor xmm1,xmm0,[r9+r10]
!vmovdqa [r9+r10],xmm1
!add r10,16
!dec rcx
!jnz @b
!mov byte[r9+r10],0 ;Zero-Byte für String, zur Sicherheit, evtl.word
FreeMemory(BufferFaktor2)
FreeMemory(BufferDez)
Ausgabe$ = LTrim(PeekS(BufferProduktA + N - LG, LG, #PB_Ascii), "0") ;evtl. führende Null(en) entfernen
If Ausgabe$ = "" ;mindestens einer der Faktoren war Null, Kompatibilität zu "normaler" Multiplikation
Ausgabe$ = "0"
EndIf
FreeMemory(BufferProdukt)
;Restore Registers
!mov rax,[BufferFXA]
!fxrstor64 [rax]
!mov rdi,[rax+464] ;restore callee-save registers
!mov rsi,[rax+472]
!mov r12,[rax+480]
!mov r13,[rax+488]
!mov r14,[rax+496]
!mov r15,[rax+504]
FreeMemory(BufferFX)
!lea rdx,[MXCSR_Sicher]
!vldmxcsr [rdx] ;schreibt den gesicherten Wert zurück ins MXCSR-Register
!vzeroupper
DisableASM
ProcedureReturn Ausgabe$
;========================================================
DataSection
!RezFakSin: ;for Sine
!dq 2.755731922398589065e-6 ; 1/9! 4.Iteration
!dq -1.984126984126984127e-4 ;-1/7! 3.Iteration
!dq 8.333333333333333333e-3 ; 1/5! 2.Iteration
!dq -1.666666666666666667e-1 ;-1/3! 1.Iteration
!dq 2.811457254345520763e-15 ; 1/17! 8.Iteration
!dq -7.647163731819816476e-13 ;-1/15! 7.Iteration
!dq 1.605904383682161460e-10 ; 1/13! 6.Iteration
!dq -2.505210838544171878e-8 ;-1/11! 5.Iteration
!dq 6.446950284384473396e-26 ; 1/25! 12.Iteration
!dq -3.868170170630684038e-23 ;-1/23! 11.Iteration
!dq 1.957294106339126123e-20 ; 1/21! 10.Iteration
!dq -8.220635246624329717e-18 ;-1/19! 9.Iteration
!RezFakCos: ;for Cosine
!dq 2.480158730158730159e-5 ; 1/8! 4.Iteration
!dq -1.388888888888888889e-3 ;-1/6! 3.Iteration
!dq 4.166666666666666667e-2 ; 1/4! 2.Iteration
!dq -5.000000000000000000e-1 ;-1/2! 1.Iteration
!dq 4.779477332387385297e-14 ; 1/16! 8.Iteration
!dq -1.147074559772972471e-11 ;-1/14! 7.Iteration
!dq 2.087675698786809898e-9 ; 1/12! 6.Iteration
!dq -2.755731922398589065e-7 ;-1/10! 5.Iteration
!dq 1.611737571096118349e-24 ; 1/24! 12.Iteration
!dq -8.896791392450573287e-22 ;-1/22! 11.Iteration
!dq 4.110317623312164858e-19 ; 1/20! 10.Iteration
!dq -1.561920696858622646e-16 ;-1/18! 9.Iteration
!Eins:
!dq 1.0
!Pi2:
!dq 6.283185307179586476925
!Null1:
!dq 0h
!Signum:
!dq 8000000000000000h ;for neg Sin/Cos
!dq 0h
!dq 8000000000000000h
!AsciiNull:
!dq 3030303030303030h ;for String
!Dezi:
!dq 10000000000
!dq 1000000000
!dq 100000000
!dq 10000000
!dq 1000000
!dq 100000
!dq 10000
!dq 1000
!dq 100
!dq 10
!dq 0 ;blanke Sicherheit
!BufferCosSinA dq ?
!BufferDezA dq ?
!BufferFaktor1A dq ?
!BufferFaktor2A dq ?
!BufferFXA dq ?
!BufferProduktA dq ?
!BufferReverseA dq ?
!Exponent dq ?
!L1 dq ?
!L2 dq ?
!LenF dq ?
!N dq ?
!ND dq ?
!PF1 dq ?
!PF2 dq ?
!Pointer_Produkt dq ?
!MXCSR dd ?
!MXCSR_Sicher dd ?
!V1 dd 55555555h
!V2 dd 33333333h
!V3 dd 0F0F0F0Fh
!V4 dd 00FF00FFh
!VVier dd 4
EndDataSection
EndProcedure
Procedure Calc() ;Aufruf durch Fermat, Mersenne und Potenzen
Protected.q Bit0Pot, BitL, BitM, BufferPot, BufferPotA, CalcPot, Exp1, Exp2, GID2, GID3, LenPot, LVGZ, Stellen, SumExp1, SumExp1Old
Protected Potenz$
ZeitA = ElapsedMilliseconds()
Stellen = Potenz * Len(Result$) + 1
Stellen = Stellen + (64 - (Stellen & $3F))
If Potenz
!bsr rax,[v_Potenz]
!mov [p.v_BitM],rax ;most significant set bit
!bsf rax,[v_Potenz]
!mov [p.v_BitL],rax ;least significant set bit
Else
BitM = 0
BitL = 0
EndIf
CalcPot = Potenz ;CalcPot wird geshiftet!
BufferPot = AllocateMemory((Stellen * 2 * Asc_Uni) + 64)
If BufferPot
BufferPotA = BufferPot + (64 - (BufferPot & $3F)) ;64-er Align
Else
MessageRequester("Error!", "Out of memory!", #MB_ICONERROR)
End
EndIf
GID2 = ListIconGadget(2, 10, 40, 780, 730, "S i n g l e S t e p s :", 375, #PB_ListIcon_GridLines | #PB_ListIcon_AlwaysShowSelection)
SetGadgetFont(2, FontID(1))
AddGadgetColumn(2, 1, "I n t e r m e d i a t e :", 375)
PokeS(BufferPotA + Stellen * Asc_Uni, Result$) ;hier noch Result$=Basis$, zum besseren Verständnis
PokeS(BufferPotA, "1") ;falls Bit0 von Potenz nicht gesetzt, X^0=1
Bit0Pot = 0
!shr qword[p.v_CalcPot],1
!jnc @f
PokeS(BufferPotA, Basis$)
Bit0Pot = 1 ;Bit0 von Potenz ist gesetzt
!@@:
Potenz$ = Bin(Potenz)
LenPot = Len(Potenz$)
GID3 = StringGadget(3, 10, 20, 780, 20, "Exponent: " + Potenz$, #ES_CENTER)
SetGadgetFont(3, FontID(0))
SetActiveGadget(3)
SendMessage_(GID3, #EM_SETSEL, LenPot - 1 + 10, LenPot + 10) ;10="Exponent: "
TextGadget(4, 200, 775, 400, 20, "P l e a s e P a t i e n c e , C a l c u l a t i o n i s r u n n i n g !", #PB_Text_Center)
SetGadgetColor(4, #PB_Gadget_BackColor, $00FFFF)
SetGadgetFont(4, FontID(0))
Exp1 = 1
Exp2 = 2
LVGZ = 0 ;Zeile im ListViewGadget (Info)
SumExp1 = 0
For i = 0 To BitM - 1 ;-1=Bit0 aus Zähler raus
AddGadgetItem (2, -1, "Calculating " + Basis$ + "^" + Str(Exp2) + " (" + Basis$ + "^" + Str(Exp1) + " * " + Basis$ + "^" + Str(Exp1) + ")" + Chr(10) + "")
SetGadgetItemColor(2, LVGZ - 1, #PB_Gadget_FrontColor, 0, #PB_All)
SetGadgetItemColor(2, LVGZ, #PB_Gadget_FrontColor, $FF0000, 0) ;links
SendMessage_(GID3, #EM_SETSEL, LenPot - 1 + 9 - i, LenPot + 9 - i)
While WaitWindowEvent(0) : Wend ;Time for displaying
SendMessage_(GID2, #LVM_ENSUREVISIBLE, LVGZ, #True) ;automatisch nach unten scrollen
Result$ = FFT_Mul_AVX(BufferPotA + Stellen * Asc_Uni, BufferPotA + Stellen * Asc_Uni) ;Potenzen berechnen
If Result$ = "-1"
MessageRequester("Error!", "Out of memory!", #MB_ICONERROR)
End
EndIf
LVGZ + 1
PokeS(BufferPotA + Stellen * Asc_Uni, Result$)
!shr qword[p.v_CalcPot],1
!jnc @f
;!popcnt rax,[v_Potenz] ;for CPU with POPCNT
;!cmp rax,1
;!je @f ;Exponent ist 2-er Potenz
If BitL <> BitM ;Exponent ist keine 2-er Potenz
SumExp1Old = SumExp1
SumExp1 + Exp1
AddGadgetItem (2, -1, "" + Chr(10) + "Calculating " + Basis$ + "^" + Str(SumExp1 * 2 + Bit0Pot) + " (" + Basis$ + "^" + Str(Exp2) + " * " + Basis$ + "^" + Str(SumExp1Old * 2 + Bit0Pot) + ")")
SetGadgetItemColor(2, LVGZ - 1, #PB_Gadget_FrontColor, 0, 0)
SetGadgetItemColor(2, LVGZ, #PB_Gadget_FrontColor, $FF0000, 1) ;rechts
SendMessage_(GID3, #EM_SETSEL, LenPot - 1 + 9 - i, LenPot + 10)
While WaitWindowEvent(0) : Wend ;Time for displaying
SendMessage_(GID2, #LVM_ENSUREVISIBLE, LVGZ, #True) ;automatisch nach unten scrollen
Result$ = FFT_Mul_AVX(BufferPotA, BufferPotA + Stellen * Asc_Uni) ;(Zwischen-) Ergebnis berechnen
If Result$ = "-1"
MessageRequester("Error!", "Out of memory!", #MB_ICONERROR)
End
EndIf
LVGZ + 1
If i < BitM
PokeS(BufferPotA, Result$)
EndIf
EndIf
!@@:
Exp1 << 1
Exp2 << 1
Next
ZeitE = ElapsedMilliseconds() - ZeitA
SetGadgetItemColor(2, LVGZ - 1, #PB_Gadget_FrontColor, 0, #PB_All) ;blau weg
SendMessage_(GID3, #EM_SETSEL, LenPot + 10, LenPot + 10) ;blau weg
If FFT <> 6 ;Fermat
FreeGadget(4)
ButtonGadget(4, 200, 775, 400, 20, "C a l c u l a t i o n F i n i s h e d , S h o w R e s u l t")
SetGadgetFont(4, FontID(0))
EndIf
TextGadget(5, 175, 775, 20, 20, Chr(62), #PB_Text_Center)
SetGadgetColor(5, #PB_Gadget_BackColor, $00FF00)
SetGadgetFont(5, FontID(0))
TextGadget(6, 605, 775, 20, 20, Chr(60), #PB_Text_Center)
SetGadgetColor(6, #PB_Gadget_BackColor, $00FF00)
SetGadgetFont(6, FontID(0))
SetActiveGadget(4) ;entfernt Caret oben bei Exponent
If FFT <> 6
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
CloseWindow(0)
End
EndIf
If Event = #PB_Event_Gadget
Select EventGadget()
Case 4
Break
EndSelect
EndIf
ForEver
EndIf
FreeMemory(BufferPot)
For i = 2 To 6
FreeGadget(i)
Next
EndProcedure
Procedure RangeCharDecimals(StrPointer)
Protected.q RangeChar
Protected Char$, CharRange$
CharRange$ = Chr(1) + Chr(47) + Chr(58) + Chr(255) ;für Suche nach Chars, die keine Dezimal-Ziffer sind. Chr(0) geht nicht!
!mov rax,[p.v_CharRange$]
!movdqu xmm1,[rax]
!mov rdx,[p.v_StrPointer]
!mov rax, -16
CompilerIf #PB_Compiler_Unicode ;Unicode
!@@:
!add rax,16
!pcmpistri xmm1,dqword[rdx+rax],00000101b ;set ecx, Bit0 = 1 und Bit1 = 0, also String-Chars werden als unsigned Words interpretiert, Bit2 = 1 und Bit3 = 0, also Test auf Ranges
!ja @b
!shr rax,1 ;Pointer in String muss hier halbiert werden (Word)
!add rax,rcx
!shl rcx,1 ;weil sonst nur 8 (Words), unten wird aber mit 16 verglichen
CompilerElse ;kein Unicode
!@@:
!add rax,16
!pcmpistri xmm1,dqword[rdx+rax],00000100b ;set ecx, Bit0 = 0 und Bit1 = 0, also String-Chars werden als unsigned Bytes interpretiert, Bit2 = 1 und Bit3 = 0, also Test auf Ranges
!ja @b
!add rax,rcx
CompilerEndIf
!cmp rcx,16 ;rcx=16 oder 8, wenn nichts gefunden! 8 schon erweitert
!je @f
!inc rax
!mov [p.v_RangeChar],rax
Char$ = Mid(PeekS(StrPointer, -1), RangeChar, 1)
MessageRequester("String: Error!", "Digit at position " + Str(RangeChar) + " is not a decimal digit! (ASCII=" + Asc(Char$) + "), Char=" + Char$, #MB_ICONERROR)
ProcedureReturn
!@@:
MessageRequester("String: O.K.!", "All digits are decimal digits!")
EndProcedure
Procedure Comparing()
Protected.q BufferFile, BufferRes, FileByte, LenFile, LenRes
Protected CompareFile$, IsCon$
TextGadget(101, 30, 25, 200, 15, "Select File for Comparison:")
SetGadgetFont(101, FontID(0))
ExplorerTreeGadget(100, 25, 45, 750, 730, "*.*", #PB_Explorer_NoDriveRequester)
Repeat
Event = WaitWindowEvent()
If Event = #PB_Event_CloseWindow
End
EndIf
Until EventType() = #PB_EventType_LeftDoubleClick And GetGadgetState(100) = #PB_Explorer_File
CompareFile$ = GetGadgetText(100) ;kompletter File-Pfad
If ReadFile(1, CompareFile$)
LenRes = Len(Result$)
BufferRes = AllocateMemory(LenRes)
PokeS(BufferRes, Result$, LenRes, #PB_Ascii)
LenFile = Lof(1)
BufferFile = AllocateMemory(LenFile) ;ohne weiteren Test
ReadData(1, BufferFile, LenFile)
j = 0
For i = 0 To LenFile - 1
FileByte = PeekB(BufferFile + i)
If FileByte > 47 And FileByte < 58
PokeB(BufferFile + j, FileByte)
j + 1
EndIf
Next
IsCon$ = ""
If CompareMemory(BufferRes, BufferFile, LenRes) = 0
IsCon$ = "No "
EndIf
MessageRequester("Comparison", IsCon$ + "Conformity!")
CloseFile(1)
FreeMemory(BufferRes)
FreeMemory(BufferFile)
Else
MessageRequester("Error!", "Can not read the comparison file!")
EndIf
FreeGadget(100)
FreeGadget(101)
EndProcedure
It´s not perfect (e.g. error-handling) but have fun and a happy and healthy new year!
Helle
Edit Jan 19, 2021: +Fermat, Optimizations