[PureBasic] Base91 encode/decode

Share your advanced PureBasic knowledge/code with the community.
xakep
User
User
Posts: 40
Joined: Fri Mar 25, 2016 2:02 pm
Location: Europe

[PureBasic] Base91 encode/decode

Post by xakep »

"basE91 is an advanced method for encoding binary data as ASCII characters.
It is similar to UUencode or base64, but is more efficient.
The overhead produced by basE91 depends on the input data.
It amounts at most to 23% (versus 33% for base64) and can range down to 14%, which typically occurs on 0-byte blocks."

I'm tired of using base64, so i converted this from javascript .. and i made it faster.
More info: http://base91.sourceforge.net

Code: Select all

; BasE91 encode/decode
; Coded by: xakep
; Based on http://base91.sourceforge.net and https://github.com/pkalogiros/base91.js
; Version 0.2, thanks to wilbert@purebasic.fr

EnableExplicit

Structure B91
  s.s[1]
  asc.a
EndStructure

Global Dim Base91_Alphabet.B91(91)
Global Base91_Initiated.b

Procedure.b Base91_Init()
  Define i.l, bAlph.s, sNow.s
  
  bAlph = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&()*+,./:;<=>?@[]^_`{|}~" + #DOUBLEQUOTE$
  
  i = 1
  Repeat
    sNow = Mid(bAlph, i, 1)

    Base91_Alphabet(i)\s = sNow
    Base91_Alphabet(i)\asc = Asc(sNow)
    
    i = i + 1
  Until i > 91
  
  Base91_Initiated = #True
EndProcedure

Procedure.l FindInB91(Array Arr.B91(1), sToFind.s)
  Define i.l, zFound.b
  
  i = 1
  Repeat
    If Arr(i)\s = sToFind
      zFound = #True
      Break
    EndIf
    i = i + 1
  Until i > 91
  
  If zFound = #True
    ProcedureReturn i -1
  EndIf
  
EndProcedure

Procedure.s Base91_encode(*Mem, memLen.l)
  Define i.l, b.l, n.l, v.l, sRet.s, x.l, *Encoded, eLen.l, iC.l
  
  If *Mem = 0 Or memLen = 0
    ProcedureReturn
  EndIf
  
  If Base91_Initiated = #False
    Base91_Init()
  EndIf
  
  *Encoded = AllocateMemory(memLen * 1.5)
  
  If *Encoded
    Repeat
      x = PeekA(*Mem + i)
      
      b = b | x << n
      n = n + 8
      
      If n > 13
        v = b & 8191
        
        If v > 88
          b = b >> 13
          n = n - 13
        Else
          v = b & 16383
          b = b >> 14
          n = n - 14
        EndIf
        
        PokeA(*Encoded + iC, Base91_Alphabet(v % 91 + 1)\asc)
        iC = iC + 1
        PokeA(*Encoded + iC, Base91_Alphabet((v / 91 | 0 ) + 1)\asc)
        iC = iC + 1
      EndIf
  
      i = i + 1
    Until i >= memLen
    
     If n
       PokeA(*Encoded + iC, Base91_Alphabet(b % 91 + 1)\asc)
       iC = iC + 1
       If n > 7 Or b > 90 
         PokeA(*Encoded + iC, Base91_Alphabet(b / 91 | 0 + 1)\asc)
         iC = iC + 1
       EndIf
     EndIf
     
     If iC > 0
       sRet = PeekS(*Encoded, iC, #PB_Ascii)
     EndIf
     FreeMemory(*Encoded)
     
     ProcedureReturn sRet
   EndIf
EndProcedure

Procedure.l Base91_decode(*Mem, memLen.l)
  Define i.l, p.l, v.l, s1.s, b.l, n.l, mCount.l
  
  If *Mem = 0 Or memLen = 0
    ProcedureReturn
  EndIf
  
  If Base91_Initiated = #False
    Base91_Init()
  EndIf
  
  v = -1
  
  Repeat
    s1 = PeekS(*Mem + i, 1, #PB_Ascii)

    If s1
      p = FindInB91(Base91_Alphabet(), s1)
      
      If p > -1
        If v = -1
          v = p
        Else
          v = v + p * 91
          b =  b | v << n
          
          If (v & 8191) > 88
            n = n + 13
          Else
            n = n + 14
          EndIf
          
          Repeat
            PokeA(*Mem + mCount, b & $ff)
            mCount = mCount + 1
            b = b >> 8
            n = n - 8
          Until n <= 7
          
          v = -1
        EndIf
      EndIf
    Else
      mCount = 0
      v = -1
      Break
    EndIf
    
    i = i + 1
  Until i >= memLen
  
  If v > -1
    PokeA(*Mem + mCount, (b | v << n) & $ff)
    mCount = mCount + 1
  EndIf
  
  ProcedureReturn mCount 
EndProcedure


Procedure.b encodeFile(FileIn.s, FileOut.s)
  Define *dIn, dLen.l, encoded.s, bRet.b
  
  If OpenFile(0, FileIn, #PB_File_SharedRead)
    dLen = Lof(0)
    If dLen
      *dIn = AllocateMemory(dLen + SizeOf(Character))
      If *dIn
        ReadData(0, *dIn, dLen)
      EndIf
    EndIf
    CloseFile(0)
  EndIf
  
  If dLen
    encoded = Base91_encode(*dIn, dLen)
    FreeMemory(*dIn)
    
    If encoded
      If OpenFile(1, FileOut, #PB_File_SharedWrite | #PB_File_SharedRead | #PB_Ascii)
        If WriteString(1, encoded, #PB_Ascii)
          encoded = ""
          bRet = #True
        EndIf
        CloseFile(1)
      EndIf
    EndIf
  EndIf
  
  ProcedureReturn bRet
EndProcedure

Procedure.b decodeFile(FileIn.s, FileOut.s)
  Define *dIn, dLen.l, lDecoded.l, bRet.b
  
  If OpenFile(0, FileIn, #PB_File_SharedRead)
    dLen = Lof(0)
    If dLen
      *dIn = AllocateMemory(dLen + SizeOf(Character))
      If *dIn
        ReadData(0, *dIn, dLen)
      EndIf
    EndIf
    CloseFile(0)
  EndIf
  
  If dLen
    lDecoded = Base91_decode(*dIn, dLen)
    If lDecoded
      If OpenFile(1, FileOut, #PB_File_SharedWrite | #PB_File_SharedRead | #PB_Unicode)
        If WriteData(1, *dIn, lDecoded)
          FreeMemory(*dIn) : *dIn = 0
          bRet = #True
        EndIf
        
        CloseFile(1)
      EndIf
    EndIf
    If *dIn > 0
      FreeMemory(*dIn)
    EndIf
  EndIf
  
  ProcedureReturn bRet
EndProcedure

Procedure.s encodeString(sIn.s)
  Define *dIn, dLen.l, dRet.s
  
  dLen = Len(sIn)
  
  If dLen = 0
    ProcedureReturn
  EndIf
  
  *dIn = AllocateMemory(dLen + SizeOf(Character))
  
  If *dIn
    If PokeS(*dIn, sIn, dLen, #PB_Ascii)
      dRet = Base91_encode(*dIn, dLen)
    EndIf
    FreeMemory(*dIn)
  EndIf
  
  ProcedureReturn dRet
EndProcedure

Procedure.s decodeString(sIn.s)
  Define *dIn, dLen.l, decLen.l, sRet.s
  dLen = Len(sIn)
  
  If dLen = 0
    ProcedureReturn
  EndIf
  
  *dIn = AllocateMemory(dLen + SizeOf(Character))
  
  If *dIn
    If PokeS(*dIn, sIn, dLen, #PB_Ascii)
      decLen = Base91_decode(*dIn, dLen)
    EndIf
    
    If decLen
      sRet = PeekS(*dIn, decLen, #PB_Ascii)
    EndIf
    
    FreeMemory(*dIn)
  EndIf
  
  ProcedureReturn sRet
EndProcedure
Encoding/decoding is pretty fast, if you know how to improve it then please share.
Last edited by xakep on Wed Aug 15, 2018 4:26 pm, edited 5 times in total.
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3870
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: [PureBasic] Base91 encode/decode

Post by wilbert »

xakep wrote:Decoding of big files is pretty fast, but encoding is very slow, not sure how to make it more fast.
The main issue is that you are combining strings.
When encoding, you should allocate a buffer, fill that and return it instead of combining strings with + .
If that's not enough, using asm will make it even more fast.
Windows (x64)
Raspberry Pi OS (Arm64)
xakep
User
User
Posts: 40
Joined: Fri Mar 25, 2016 2:02 pm
Location: Europe

Re: [PureBasic] Base91 encode/decode

Post by xakep »

wilbert wrote:
xakep wrote:Decoding of big files is pretty fast, but encoding is very slow, not sure how to make it more fast.
The main issue is that you are combining strings.
When encoding, you should allocate a buffer, fill that and return it instead of combining strings with + .
If that's not enough, using asm will make it even more fast.
Yes, that's right, now encoding is also fast:

Code: Select all

; BasE91 encode/decode
; Coded by: xakep
; Based on http://base91.sourceforge.net/ and https://github.com/pkalogiros/base91.js
; Version 0.2, thanks to wilbert@purebasic.fr

EnableExplicit

Structure B91
  s.s[1]
  asc.a
EndStructure

Global Dim Base91_Alphabet.B91(91)
Global Base91_Initiated.b

Procedure.b Base91_Init()
  Define i.l, bAlph.s, sNow.s
  
  bAlph = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&()*+,./:;<=>?@[]^_`{|}~" + #DOUBLEQUOTE$
  
  i = 1
  Repeat
    sNow = Mid(bAlph, i, 1)

    Base91_Alphabet(i)\s = sNow
    Base91_Alphabet(i)\asc = Asc(sNow)
    
    i = i + 1
  Until i > 91
  
  Base91_Initiated = #True
EndProcedure

Procedure.l FindInB91(Array Arr.B91(1), sToFind.s)
  Define i.l, zFound.b
  
  i = 1
  Repeat
    If Arr(i)\s = sToFind
      zFound = #True
      Break
    EndIf
    i = i + 1
  Until i > 91
  
  If zFound = #True
    ProcedureReturn i -1
  EndIf
  
EndProcedure

Procedure.s Base91_encode(*Mem, memLen.l)
  Define i.l, b.l, n.l, v.l, sRet.s, x.l, *Encoded, eLen.l, iC.l
  
  If *Mem = 0 Or memLen = 0
    ProcedureReturn
  EndIf
  
  If Base91_Initiated = #False
    Base91_Init()
  EndIf
  
  *Encoded = AllocateMemory(memLen * 1.5)
  
  If *Encoded
    Repeat
      x = PeekA(*Mem + i)
      
      b = b | x << n
      n = n + 8
      
      If n > 13
        v = b & 8191
        
        If v > 88
          b = b >> 13
          n = n - 13
        Else
          v = b & 16383
          b = b >> 14
          n = n - 14
        EndIf
        
        PokeA(*Encoded + iC, Base91_Alphabet(v % 91 + 1)\asc)
        iC = iC + 1
        PokeA(*Encoded + iC, Base91_Alphabet((v / 91 | 0 ) + 1)\asc)
        iC = iC + 1
      EndIf
  
      i = i + 1
    Until i >= memLen
    
     If n
       PokeA(*Encoded + iC, Base91_Alphabet(b % 91 + 1)\asc)
       iC = iC + 1
       If n > 7 Or b > 90 
         PokeA(*Encoded + iC, Base91_Alphabet(b / 91 | 0 + 1)\asc)
         iC = iC + 1
       EndIf
     EndIf
     
     If iC > 0
       sRet = PeekS(*Encoded, iC, #PB_Ascii)
     EndIf
     FreeMemory(*Encoded)
     
     ProcedureReturn sRet
   EndIf
EndProcedure

Procedure.l Base91_decode(*Mem, memLen.l)
  Define i.l, p.l, v.l, s1.s, b.l, n.l, mCount.l
  
  If *Mem = 0 Or memLen = 0
    ProcedureReturn
  EndIf
  
  If Base91_Initiated = #False
    Base91_Init()
  EndIf
  
  v = -1
  
  Repeat
    s1 = PeekS(*Mem + i, 1, #PB_Ascii)

    If s1
      p = FindInB91(Base91_Alphabet(), s1)
      
      If p > -1
        If v = -1
          v = p
        Else
          v = v + p * 91
          b =  b | v << n
          
          If (v & 8191) > 88
            n = n + 13
          Else
            n = n + 14
          EndIf
          
          Repeat
            PokeA(*Mem + mCount, b & $ff)
            mCount = mCount + 1
            b = b >> 8
            n = n - 8
          Until n <= 7
          
          v = -1
        EndIf
      EndIf
    Else
      mCount = 0
      v = -1
      Break
    EndIf
    
    i = i + 1
  Until i >= memLen
  
  If v > -1
    PokeA(*Mem + mCount, (b | v << n) & $ff)
    mCount = mCount + 1
  EndIf
  
  ProcedureReturn mCount 
EndProcedure


Procedure.b encodeFile(FileIn.s, FileOut.s)
  Define *dIn, dLen.l, encoded.s, bRet.b
  
  If OpenFile(0, FileIn, #PB_File_SharedRead)
    dLen = Lof(0)
    If dLen
      *dIn = AllocateMemory(dLen + SizeOf(Character))
      If *dIn
        ReadData(0, *dIn, dLen)
      EndIf
    EndIf
    CloseFile(0)
  EndIf
  
  If dLen
    encoded = Base91_encode(*dIn, dLen)
    FreeMemory(*dIn)
    
    If encoded
      If OpenFile(1, FileOut, #PB_File_SharedWrite | #PB_File_SharedRead | #PB_Ascii)
        If WriteString(1, encoded, #PB_Ascii)
          encoded = ""
          bRet = #True
        EndIf
        CloseFile(1)
      EndIf
    EndIf
  EndIf
  
  ProcedureReturn bRet
EndProcedure

Procedure.b decodeFile(FileIn.s, FileOut.s)
  Define *dIn, dLen.l, lDecoded.l, bRet.b
  
  If OpenFile(0, FileIn, #PB_File_SharedRead)
    dLen = Lof(0)
    If dLen
      *dIn = AllocateMemory(dLen + SizeOf(Character))
      If *dIn
        ReadData(0, *dIn, dLen)
      EndIf
    EndIf
    CloseFile(0)
  EndIf
  
  If dLen
    lDecoded = Base91_decode(*dIn, dLen)
    If lDecoded
      If OpenFile(1, FileOut, #PB_File_SharedWrite | #PB_File_SharedRead | #PB_Unicode)
        If WriteData(1, *dIn, lDecoded)
          FreeMemory(*dIn) : *dIn = 0
          bRet = #True
        EndIf
        
        CloseFile(1)
      EndIf
    EndIf
    If *dIn > 0
      FreeMemory(*dIn)
    EndIf
  EndIf
  
  ProcedureReturn bRet
EndProcedure

Procedure.s encodeString(sIn.s)
  Define *dIn, dLen.l, dRet.s
  
  dLen = Len(sIn)
  
  If dLen = 0
    ProcedureReturn
  EndIf
  
  *dIn = AllocateMemory(dLen + SizeOf(Character))
  
  If *dIn
    If PokeS(*dIn, sIn, dLen, #PB_Ascii)
      dRet = Base91_encode(*dIn, dLen)
    EndIf
    FreeMemory(*dIn)
  EndIf
  
  ProcedureReturn dRet
EndProcedure

Procedure.s decodeString(sIn.s)
  Define *dIn, dLen.l, decLen.l, sRet.s
  dLen = Len(sIn)
  
  If dLen = 0
    ProcedureReturn
  EndIf
  
  *dIn = AllocateMemory(dLen + SizeOf(Character))
  
  If *dIn
    If PokeS(*dIn, sIn, dLen, #PB_Ascii)
      decLen = Base91_decode(*dIn, dLen)
    EndIf
    
    If decLen
      sRet = PeekS(*dIn, decLen, #PB_Ascii)
    EndIf
    
    FreeMemory(*dIn)
  EndIf
  
  ProcedureReturn sRet
EndProcedure
I suppose string + string is slow because behind-the-scene code is like: GetMemSize + (re)allocatememory + pokes (or maybe asc(s) + pokeA) .. :D
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3870
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: [PureBasic] Base91 encode/decode

Post by wilbert »

xakep wrote:I suppose string + string is slow because behind-the-scene code is like: GetMemSize + (re)allocatememory + pokes (or maybe asc(s) + pokeA) .. :D
An important reason is that PB doesn't store the length of a string so every time something is added, it has to scan the length of both strings before combining them.
So the longer the string gets, the more time it will take to add something to it.
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
Kwai chang caine
Always Here
Always Here
Posts: 5353
Joined: Sun Nov 05, 2006 11:42 pm
Location: Lyon - France

Re: [PureBasic] Base91 encode/decode

Post by Kwai chang caine »

Thirst post works very well on W10 x86 / v5.62 x86
Thanks for sharing 8)
ImageThe happiness is a road...
Not a destination
Post Reply