original asm nicked from wilbert
The macros should make it a bit easer to adapt existing asm to c backend without to much gnashing of teeth.
Code: Select all
CompilerIf #PB_Compiler_Backend = #PB_Backend_C
;BeginAsm();
;!"fasm sytnax asm each line like this;"
;AsmOutputs(var)
;AsmInput(var)
;AsmClobbers() "rax","rdx","memory"
;EndAsm();
Macro AsmInput(var)
!:[var] "r" (v_#var)
EndMacro
Macro AsmInput2(var0,var1)
!:[var0] "r" (v_#var0),[var1] "r" (v_#var1)
EndMacro
Macro AsmInput3(var0,var1,var2)
!:[var0] "r" (v_#var0),[var1] "r" (v_#var1),[var2] "r" (v_#var2)
EndMacro
Macro AsmOutput(var)
!".att_syntax;"
!:[var] "=r" (v_#var)
EndMacro
Macro BeginAsm()
!__asm__ __volatile__ (".intel_syntax noprefix;"
EndMacro
Macro AsmClobbers() ; "eax","edx" ...
!:
EndMacro
Macro EndAsm()
!);
EndMacro
Procedure.q FastHash64(*Buffer, Len, Seed.q=0)
; FastHash64 algorithm by Zilong Tan ported by wilbert
Protected res.q, buff = *buffer
BeginAsm()
!"mov r10, 0x2127599bf4325c37;"
!"mov r11, 0x880355f21e6d1965;"
!"mov rdx, %[buff];"
!"mov rcx, %[len];"
!"mov rax, rcx;" //; h = seed ^ (len * m);
!"imul rax, r11;"
!"xor rax, %[seed];"
!"sub rcx, 8;"
!"jc .l1;"
!".l0:;"
!"mov r8, [rdx];" //; v = *pos++;
!"add rdx, 8;"
!"mov r9, r8;"
!"shr r9, 23;"
!"xor r8, r9;"
!"imul r8, r10;"
!"mov r9, r8;"
!"shr r9, 47;"
!"xor r8, r9;"
!"xor rax, r8;" //; h ^= mix(v);
!"imul rax, r11;" //; h *= m;
!"sub rcx, 8;"
!"jnc .l0;"
!".l1:;"
!"add rcx, 8;"
!"jz .l5;"
!"xor r8, r8;"
!"test rcx, 4;"
!"jz .l2;"
!"mov r8d, [rdx];"
!"add rdx, 4;"
!"ror r8, 32;"
!".l2:;"
!"test rcx, 2;"
!"jz .l3;"
!"movzx r9d, word ptr [rdx];"
!"add rdx, 2;"
!"xor r8, r9;"
!"ror r8, 16;"
!".l3:;"
!"test rcx, 1;"
!"jz .l4;"
!"movzx r9d, byte ptr [rdx];"
!"xor r8, r9;"
!"ror r8, 8;"
!".l4:;"
!"and rcx, 7;"
!"shl rcx, 3;"
!"rol r8, cl;"
!"mov r9, r8;"
!"shr r9, 23;"
!"xor r8, r9;"
!"imul r8, r10;"
!"mov r9, r8;"
!"shr r9, 47;"
!"xor r8, r9;"
!"xor rax, r8;" //; h ^= mix(v);
!"imul rax, r11;" //; h *= m;
!".l5:;"
!"mov r9, rax;"
!"shr r9, 23;"
!"xor rax, r9;"
!"imul rax, r10;"
!"mov r9, rax;"
!"shr r9, 47;"
!"xor rax, r9;"
!"mov %[vres],rax;"
AsmOutput(vres)
AsmInput3(buff,len,seed)
AsmClobbers() "r8","r9","r10","r11","rax","rcx","rdx"
EndAsm()
ProcedureReturn vres ; return mix(h);
EndProcedure
CompilerElse
Procedure.q FastHash64(*Buffer, Len, Seed.q=0)
; FastHash64 algorithm by Zilong Tan ported by wilbert
!mov r10, 0x2127599bf4325c37
!mov r11, 0x880355f21e6d1965
!mov rdx, [p.p_Buffer]
!mov rcx, [p.v_Len]
!mov rax, rcx ; h = seed ^ (len * m);
!imul rax, r11
!xor rax, [p.v_Seed]
!sub rcx, 8
!jc .l1
; 8 byte loop
!.l0:
!mov r8, [rdx] ; v = *pos++;
!add rdx, 8
; -- mix(v) start --
!mov r9, r8
!shr r9, 23
!xor r8, r9
!imul r8, r10
!mov r9, r8
!shr r9, 47
!xor r8, r9
; -- mix end --
!xor rax, r8 ; h ^= mix(v);
!imul rax, r11 ; h *= m;
!sub rcx, 8
!jnc .l0
; remaining bytes
!.l1:
!add rcx, 8
!jz .l5
!xor r8, r8
!test rcx, 4
!jz .l2
; get 4 bytes
!mov r8d, [rdx]
!add rdx, 4
!ror r8, 32
!.l2:
!test rcx, 2
!jz .l3
; get 2 bytes
!movzx r9d, word [rdx]
!add rdx, 2
!xor r8, r9
!ror r8, 16
!.l3:
!test rcx, 1
!jz .l4
; get 1 byte
!movzx r9d, byte [rdx]
!xor r8, r9
!ror r8, 8
!.l4:
!and rcx, 7
!shl rcx, 3
!rol r8, cl
; -- mix(v) start --
!mov r9, r8
!shr r9, 23
!xor r8, r9
!imul r8, r10
!mov r9, r8
!shr r9, 47
!xor r8, r9
; -- mix end --
!xor rax, r8 ; h ^= mix(v);
!imul rax, r11 ; h *= m;
; -- mix(h) start --
!.l5:
!mov r9, rax
!shr r9, 23
!xor rax, r9
!imul rax, r10
!mov r9, rax
!shr r9, 47
!xor rax, r9
; -- mix end --
ProcedureReturn ; return mix(h);
EndProcedure
CompilerEndIf
CompilerIf #PB_Compiler_IsMainFile
Global s.s = "the quick brown fox"
Global len = StringByteLength(s)
Global *ptr = @s
Debug FastHash64(*ptr,len)
;3168172151239496691 c
;3168172151239496691 asm
CompilerEndIf