I was thinking about something like this (SSE2)
Code: Select all
Structure RegSSE
StructureUnion
f.f[4]
l.l[4]
EndStructureUnion
EndStructure
Procedure Float4ToLong4(*RegAligned.RegSSE)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rax, [p.p_RegAligned]
!cvtps2dq xmm0, [rax]
!movdqa [rax], xmm0
CompilerElse
!mov eax, [p.p_RegAligned]
!cvtps2dq xmm0, [eax]
!movdqa [eax], xmm0
CompilerEndIf
EndProcedure
Procedure Long4ToFloat4(*RegAligned.RegSSE)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rax, [p.p_RegAligned]
!cvtdq2ps xmm0, [rax]
!movdqa [rax], xmm0
CompilerElse
!mov eax, [p.p_RegAligned]
!cvtdq2ps xmm0, [eax]
!movdqa [eax], xmm0
CompilerEndIf
EndProcedure
*Mem = AllocateMemory(32)
*MemAligned = (*Mem + 15) & -16
*Reg.RegSSE = *MemAligned
*Reg\f[0] = 25.5
*Reg\f[1] = 125
*Reg\f[2] = 98
Float4ToLong4(*Reg)
Debug *Reg\l[0]
Debug *Reg\l[1]
Debug *Reg\l[2]
But if you are not going to call it millions of times, using unaligned memory like you did is probably easier
Code: Select all
Structure RegSSE
StructureUnion
f.f[4]
l.l[4]
EndStructureUnion
EndStructure
Procedure Float4ToLong4(*Reg.RegSSE)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rax, [p.p_Reg]
!movdqu xmm0, [rax]
!cvtps2dq xmm0, xmm0
!movdqu [rax], xmm0
CompilerElse
!mov eax, [p.p_Reg]
!movdqu xmm0, [eax]
!cvtps2dq xmm0, xmm0
!movdqu [eax], xmm0
CompilerEndIf
EndProcedure
Procedure Long4ToFloat4(*Reg.RegSSE)
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rax, [p.p_Reg]
!movdqu xmm0, [rax]
!cvtdq2ps xmm0, xmm0
!movdqu [rax], xmm0
CompilerElse
!mov eax, [p.p_Reg]
!movdqu xmm0, [eax]
!cvtdq2ps xmm0, xmm0
!movdqu [eax], xmm0
CompilerEndIf
EndProcedure
Reg.RegSSE
Reg\f[0] = 25.5
Reg\f[1] = 125
Reg\f[2] = 98
Float4ToLong4(@Reg)
Debug Reg\l[0]
Debug Reg\l[1]
Debug Reg\l[2]
If you prefer truncation when converting to long, you can use CVTTPS2DQ .