It keeps key/value pairs always sorted on the keys you use and is fast at matching prefixes.
It's not nearly as fast as a map but has its own strengths.
The default configuration treats A-Z and a-z as equal. If you don't want that, you can alter the procedure that
generates the compare table or change
#CB_USE_COMPARETABLE = #False
While it's not directly a port, the C source on this page has helped a lot in trying to understand how a crit-bit tree works.
https://github.com/ennorehling/critbit/ ... /critbit.c
While working on it, Idle tried it and gave me useful feedback. Nevertheless, it could still need some further testing.
So if you encounter any problems or have additional thoughts, feel free to tell.
Code: Select all
;- Module CritBitTree
; Author : Wilbert
; Update : Aug 25, 2020
DeclareModule CritBitTree
;- Public constants
; Enumeration values for CBKey and CBKeyPtr procedures
Enumeration
#CritBit_DeleteKey ; Delete Key; return true when deleted, false when not found
#CritBit_GetPointer ; Return external node pointer (can be used to check if key exists)
#CritBit_GetValue ; Return value
#CritBit_SetValue ; Set value; return node pointer (creates key if needed)
#CritBit_AddValue ; Add to value; return node pointer (creates key if needed)
EndEnumeration
;- Public structures
; External crit-bit node structure
Structure CritBitNodeE
Value.i
KeyLen.u
Key.c[0]
EndStructure
; Crit-bit tree structure
Structure CritBitTree
*Root
Size.i
EndStructure
; Structure for CBStats procedure
Structure CritBitStats
Count.i
TotalKeyLength.i
LongestKeyLength.i
AllocatedMemory.i
MaxDepth.i
EndStructure
;- Public procedure declarations
Declare CBClear (*CB.CritBitTree)
Declare.i CBKey (*CB.CritBitTree, Key.s, Operation = #CritBit_SetValue, Value.i = 0)
Declare.i CBKeyPtr (*CB.CritBitTree, *KeyString, Operation = #CritBit_SetValue, Value.i = 0)
Declare.i CBSize (*CB.CritBitTree)
Declare.s CBFirstKey (*CB.CritBitTree, Prefix.s = "")
Declare.i CBGetKeys (*CB.CritBitTree, Array Keys.s(1), Prefix.s = "", MaxResults = -1, Offset = 0, Ascending = #True)
Declare.s CBLastKey (*CB.CritBitTree, Prefix.s = "")
Declare CBStats (*CB.CritBitTree, *Stats.CritBitStats, Prefix.s = "")
Declare CBWalk (*CB.CritBitTree, *Callback, Prefix.s = "", *UserData = #Null, Ascending = #True)
Declare.s CBPreviousKey (*CB.CritBitTree, Key.s)
Declare.s CBNextKey (*CB.CritBitTree, Key.s)
Declare.s CBLongestPrefix (*CB.CritBitTree)
Declare.s CBLongestCommonPrefix (*CB.CritBitTree)
EndDeclareModule
Module CritBitTree
DisableDebugger
EnableExplicit
;- Private configuration constants
#CB_USE_COMPARETABLE = #True
#CB_THREADED_CLEAR = #False
;- Other private constants (do not change)
#EXTERNAL_NODE = 1
#USHIFT = Bool(SizeOf(Character) = 2)
;- Private structures
Structure CBString
u.c[0]
EndStructure
Structure CritBitNode
*Child[2]
Unit.u
Mask.u
EndStructure
Structure NodePtr
*Node.CritBitNode
EndStructure
;- Create and init compare table
Global Dim _CT.u(65535)
Procedure Init_CT()
Protected i.i
For i = 1 To 65535
_CT(i) = i
Next
For i = 'A' To 'Z'
_CT(i) = i ! $20
Next
EndProcedure
Init_CT()
;- Versions of CT and CmpKey based on #CB_USE_COMPARETABLE
CompilerIf #CB_USE_COMPARETABLE = #True
Macro CT(n) : _CT(n) : EndMacro
Procedure.i CmpKey(*Key1.CBString, *Key2.CBString, KeyLen)
While KeyLen
KeyLen - 1
If CT(*Key1\u[KeyLen]) <> CT(*Key2\u[KeyLen])
ProcedureReturn #False
EndIf
Wend
ProcedureReturn #True
EndProcedure
CompilerElse
Macro CT(n) : (n) : EndMacro
Macro CmpKey(Key1, Key2, KeyLen)
CompareMemory(Key1, Key2, KeyLen << #USHIFT)
EndMacro
CompilerEndIf
;- Procedures
; >> [Private] Create external node <<
Procedure.i CBCreateExternalNode(*Key, KeyLen.u, Value.i = 0)
Protected *ExternalNode.CritBitNodeE
; zero fill when allocate is important when structure is extended !
*ExternalNode = AllocateMemory(KeyLen << #USHIFT + SizeOf(CritBitNodeE))
*ExternalNode\Value = Value
*ExternalNode\KeyLen = KeyLen
CopyMemory(*Key, @*ExternalNode\Key, KeyLen << #USHIFT)
ProcedureReturn *ExternalNode | #EXTERNAL_NODE
EndProcedure
; >> [Private] Recursive helper function for CBClear <<
Procedure CBClear_r(*Node.CritBitNode)
If *Node & #EXTERNAL_NODE
*Node & -2
Else
CBClear_r(*Node\Child[0])
CBClear_r(*Node\Child[1])
EndIf
FreeMemory(*Node)
EndProcedure
; >< [Public] CBClear ><------------;
; ;
; Clears the entire crit-bit tree ;
; ----------------------------------;
Procedure CBClear(*CB.CritBitTree)
If *CB And *CB\Root
CompilerIf #CB_THREADED_CLEAR
CreateThread(@CBClear_r(), *CB\Root)
CompilerElse
CBClear_r(*CB\Root)
CompilerEndIf
*CB\Root = #Null
*CB\Size = 0
EndIf
EndProcedure
; >< [Public] CBKey ><------------------------------------;
; ;
; Perform basic key-value operations on a crit-bit tree ;
; --------------------------------------------------------;
Procedure.i CBKey(*CB.CritBitTree, Key.s, Operation = #CritBit_SetValue, Value.i = 0)
ProcedureReturn CBKeyPtr(*CB, @Key, Operation, Value)
EndProcedure
; >< [Public] CBKeyPtr ><---------------------------------;
; ;
; Perform basic key-value operations on a crit-bit tree ;
; Uses a pointer to a key string ;
; --------------------------------------------------------;
Procedure.i CBKeyPtr(*CB.CritBitTree, *KeyString, Operation = #CritBit_SetValue, Value.i = 0)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node
Protected.CBString *Key
Protected.NodePtr *PtrIter, *PtrPrev
Protected.i KeyLen, Branch, Unit, _Unit, Mask, uVal.c
If *CB And *KeyString
; Get the length of the key
*Key = *KeyString
KeyLen = MemoryStringLength(*Key)
; Check if *Root has already been set
If *CB\Root = #Null
If Operation >= #CritBit_SetValue
*CB\Root = CBCreateExternalNode(*Key, KeyLen, Value)
*CB\Size = 1
EndIf
ProcedureReturn *CB\Root & -2
Else
; Iterate over internal nodes
_Unit = -1
*PtrPrev = #Null
*PtrIter = @*CB\Root
While *PtrIter\Node & #EXTERNAL_NODE = #False
*PtrPrev = *PtrIter
If *PtrIter\Node\Unit < KeyLen
If *PtrIter\Node\Unit <> _Unit; new unit ?
uVal = CT(*Key\u[*PtrIter\Node\Unit])
_Unit = *PtrIter\Node\Unit
EndIf
*PtrIter = @*PtrIter\Node\Child[((uVal | *PtrIter\Node\Mask) + 1) >> 16]
Else
*PtrIter = @*PtrIter\Node\Child[0]
EndIf
Wend
*ExternalNode = *PtrIter\Node & -2
If Operation < #CritBit_SetValue
If *ExternalNode\KeyLen = KeyLen And CmpKey(*Key, @*ExternalNode\Key, KeyLen)
; Key found
Select Operation
Case #CritBit_GetValue
ProcedureReturn *ExternalNode\Value
Case #CritBit_GetPointer
ProcedureReturn *ExternalNode
Case #CritBit_DeleteKey
FreeMemory(*ExternalNode)
If *PtrPrev
*Node = *PtrPrev\Node
*PtrPrev\Node = *Node\Child[Bool(*Node\Child[0] = *PtrIter\Node)]
FreeMemory(*Node)
Else
*CB\Root = #Null
EndIf
*CB\Size - 1
ProcedureReturn #True
EndSelect
Else
ProcedureReturn #False
EndIf
Else
; #CritBit_SetValue / #CritBit_AddValue
Unit = 0
While Unit < KeyLen And Unit < *ExternalNode\KeyLen And
CT(*Key\u[Unit]) = CT(*ExternalNode\Key[Unit])
Unit + 1
Wend
If Unit = KeyLen
If Unit = *ExternalNode\KeyLen
; End reached of both Key and ExternalKey (Key found)
If Operation = #CritBit_SetValue
*ExternalNode\Value = Value
Else
; #CritBit_AddValue
*ExternalNode\Value + Value
EndIf
ProcedureReturn *ExternalNode
EndIf
; End reached of Key but not of ExternalKey
Mask = CT(*ExternalNode\Key[Unit])
ElseIf Unit = *ExternalNode\KeyLen
; End reached of ExternalKey but not of Key
Mask = CT(*Key\u[Unit])
Else
; Neither Key nor ExternalKey have reached the end
Mask = CT(*Key\u[Unit]) ! CT(*ExternalNode\Key[Unit])
EndIf
; Find insert position
Mask | Mask >> 1
Mask | Mask >> 2
Mask | Mask >> 4
Mask | Mask >> 8
Mask = (Mask & ~(Mask >> 1)) ! $ffff
If *PtrPrev And (Unit < *PtrPrev\Node\Unit Or (Unit = *PtrPrev\Node\Unit And Mask < *PtrPrev\Node\Mask))
_Unit = -1
*PtrIter = @*CB\Root
While *PtrIter\Node\Unit < Unit Or (*PtrIter\Node\Unit = Unit And *PtrIter\Node\Mask < Mask)
If *PtrIter\Node\Unit <> _Unit; new unit ?
uVal = CT(*Key\u[*PtrIter\Node\Unit])
_Unit = *PtrIter\Node\Unit
EndIf
*PtrIter = @*PtrIter\Node\Child[((uVal | *PtrIter\Node\Mask) + 1) >> 16]
Wend
EndIf
; Create and insert node
Branch = ((CT(*Key\u[Unit]) | Mask) + 1) >> 16
*Node = AllocateMemory(SizeOf(CritBitNode), #PB_Memory_NoClear)
*Node\Child[Branch] = CBCreateExternalNode(*Key, KeyLen, Value)
*Node\Child[Branch ! 1] = *PtrIter\Node
*Node\Unit = Unit
*Node\Mask = Mask
*PtrIter\Node = *Node
*CB\Size + 1
EndIf
EndIf
EndIf
ProcedureReturn *ExternalNode
EndProcedure
; >> [Private] Find the top node for a given prefix <<
Procedure.i CBFindTop(*CB.CritBitTree, *Prefix.CBString)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node, *Top
Protected.i PrefixLen, _Unit, uVal.c
If *CB And *CB\Root
PrefixLen = MemoryStringLength(*Prefix)
If PrefixLen = 0
ProcedureReturn *CB\Root
Else
_Unit = -1
*Node = *CB\Root
*Top = *Node
While *Node & #EXTERNAL_NODE = #False
If *Node\Unit < PrefixLen
If *Node\Unit <> _Unit; new unit ?
uVal = CT(*Prefix\u[*Node\Unit])
_Unit = *Node\Unit
EndIf
*Node = *Node\Child[((uVal | *Node\Mask) + 1) >> 16]
*Top = *Node
Else
*Node = *Node\Child[0]
EndIf
Wend
*ExternalNode = *Node & -2
If PrefixLen <= *ExternalNode\KeyLen And CmpKey(@*ExternalNode\Key, *Prefix, PrefixLen)
ProcedureReturn *Top
EndIf
EndIf
EndIf
ProcedureReturn #Null
EndProcedure
; >> [Private] Recursive helper function for CBStats <<
Procedure CBStats_r(*Node.CritBitNode, *Stats.CritBitStats, Depth = 1)
Protected.CritBitNodeE *ExternalNode
If *Node & #EXTERNAL_NODE
*ExternalNode = *Node & -2
*Stats\Count + 1
*Stats\TotalKeyLength + *ExternalNode\KeyLen
If *ExternalNode\KeyLen > *Stats\LongestKeyLength
*Stats\LongestKeyLength = *ExternalNode\KeyLen
EndIf
If Depth > *Stats\MaxDepth
*Stats\MaxDepth = Depth
EndIf
Else
CBStats_r(*Node\Child[0], *Stats, Depth + 1)
CBStats_r(*Node\Child[1], *Stats, Depth + 1)
EndIf
EndProcedure
; >< [Public] CBStats ><-------------------------------------------;
; ;
; Fills the *Stats structure with information for a given prefix ;
; -----------------------------------------------------------------;
Procedure CBStats(*CB.CritBitTree, *Stats.CritBitStats, Prefix.s = "")
Protected.CritBitNode *Top
*Stats\Count = 0
*Stats\TotalKeyLength = 0
*Stats\LongestKeyLength = 0
*Stats\AllocatedMemory = 0
*Stats\MaxDepth = 0
*Top = CBFindTop(*CB, @Prefix)
If *Top
CBStats_r(*Top, *Stats)
*Stats\AllocatedMemory = (*Stats\Count - 1) * SizeOf(CritBitNode) +
*Stats\Count * SizeOf(CritBitNodeE) +
*Stats\TotalKeyLength << #USHIFT
EndIf
EndProcedure
; >> [Private] Follow child nodes with index 0 <<
Procedure.i CBFollow0(*Node.CritBitNode)
While *Node & #EXTERNAL_NODE = #False
*Node = *Node\Child[0]
Wend
ProcedureReturn *Node & -2
EndProcedure
; >> [Private] Follow child nodes with index 1 <<
Procedure.i CBFollow1(*Node.CritBitNode)
While *Node & #EXTERNAL_NODE = #False
*Node = *Node\Child[1]
Wend
ProcedureReturn *Node & -2
EndProcedure
; >< [Public] CBFirstKey ><-----------------------;
; ;
; Returns the first key matching a given prefix ;
; ------------------------------------------------;
Procedure.s CBFirstKey(*CB.CritBitTree, Prefix.s = "")
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node
*Node = CBFindTop(*CB, @Prefix)
If *Node
*ExternalNode = CBFollow0(*Node)
ProcedureReturn PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen)
EndIf
ProcedureReturn ""
EndProcedure
; >< [Public] CBLastKey ><-----------------------;
; ;
; Returns the last key matching a given prefix ;
; -----------------------------------------------;
Procedure.s CBLastKey(*CB.CritBitTree, Prefix.s = "")
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node
*Node = CBFindTop(*CB, @Prefix)
If *Node
*ExternalNode = CBFollow1(*Node)
ProcedureReturn PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen)
EndIf
ProcedureReturn ""
EndProcedure
; >< [Public] CBPreviousKey ><--------------;
; ;
; If Key exists, returns the previous key ;
; If not, returns an empty string ;
; ------------------------------------------;
Procedure.s CBPreviousKey(*CB.CritBitTree, Key.s)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node, *Prev
Protected.CBString *Key
Protected.i KeyLen, Branch, _Unit, uVal.c
If *CB And *CB\Root
_Unit = -1
*Key = @Key
KeyLen = MemoryStringLength(@Key)
*Node = *CB\Root
While *Node & #EXTERNAL_NODE = #False
Branch = 0
If *Node\Unit < KeyLen
If *Node\Unit <> _Unit; new unit ?
uVal = CT(*Key\u[*Node\Unit])
_Unit = *Node\Unit
EndIf
Branch = ((uVal | *Node\Mask) + 1) >> 16
EndIf
If Branch = 1
*Prev = *Node\Child[0]
EndIf
*Node = *Node\Child[Branch]
Wend
*ExternalNode = *Node & -2
If *Prev And *ExternalNode\KeyLen = KeyLen And CmpKey(*Key, @*ExternalNode\Key, KeyLen)
*ExternalNode = CBFollow1(*Prev)
ProcedureReturn PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen)
EndIf
EndIf
ProcedureReturn ""
EndProcedure
; >< [Public] CBNextKey ><--------------;
; ;
; If Key exists, returns the next key ;
; If not, returns an empty string ;
; --------------------------------------;
Procedure.s CBNextKey(*CB.CritBitTree, Key.s)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node, *Next
Protected.CBString *Key
Protected.i KeyLen, Branch, _Unit, uVal.c
If *CB And *CB\Root
_Unit = -1
*Key = @Key
KeyLen = MemoryStringLength(@Key)
*Node = *CB\Root
While *Node & #EXTERNAL_NODE = #False
Branch = 0
If *Node\Unit < KeyLen
If *Node\Unit <> _Unit; new unit ?
uVal = CT(*Key\u[*Node\Unit])
_Unit = *Node\Unit
EndIf
Branch = ((uVal | *Node\Mask) + 1) >> 16
EndIf
If Branch = 0
*Next = *Node\Child[1]
EndIf
*Node = *Node\Child[Branch]
Wend
*ExternalNode = *Node & -2
If *Next And *ExternalNode\KeyLen = KeyLen And CmpKey(*Key, @*ExternalNode\Key, KeyLen)
*ExternalNode = CBFollow0(*Next)
ProcedureReturn PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen)
EndIf
EndIf
ProcedureReturn ""
EndProcedure
; >> [Private] Recursive helper function for longest prefix <<
Procedure CBLongestPrefix_r(*Node.CritBitNode, *LPLen.Integer, *LPNode.Integer)
If *Node\Child[0] & *Node\Child[1] & #EXTERNAL_NODE
If *Node\Unit > *LPLen\i
*LPLen\i = *Node\Unit
*LPNode\i = *Node\Child[0] & -2
EndIf
Else
If *Node\Child[0] & #EXTERNAL_NODE = #False
CBLongestPrefix_r(*Node\Child[0], *LPLen, *LPNode)
EndIf
If *Node\Child[1] & #EXTERNAL_NODE = #False
CBLongestPrefix_r(*Node\Child[1], *LPLen, *LPNode)
EndIf
EndIf
EndProcedure
; >< [Public] CBLongestPrefix ><-------------------------------;
; ;
; Returns the longest prefix two or more keys have in common ;
; If there are multiple results, it returns the first one ;
; -------------------------------------------------------------;
Procedure.s CBLongestPrefix(*CB.CritBitTree)
Protected.CritBitNodeE *ExternalNode
Protected.i LPLen
If *CB And *CB\Root And *CB\Root & #EXTERNAL_NODE = #False
CBLongestPrefix_r(*CB\Root, @LPLen, @*ExternalNode)
If LPLen
ProcedureReturn PeekS(@*ExternalNode\Key, LPLen)
EndIf
EndIf
ProcedureReturn ""
EndProcedure
; >< [Public] CBLongestCommonPrefix ><-----------------;
; ;
; Returns the longest prefix all keys have in common ;
; -----------------------------------------------------;
Procedure.s CBLongestCommonPrefix(*CB.CritBitTree)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitNode *Node
If *CB And *CB\Root
*Node = *CB\Root
If *Node & #EXTERNAL_NODE = #False And *Node\Unit
*ExternalNode = CBFollow0(*Node)
ProcedureReturn PeekS(@*ExternalNode\Key, *Node\Unit)
EndIf
EndIf
ProcedureReturn ""
EndProcedure
; >> [Private] Walk function prototype <<
Prototype.i WalkFn(Key.s, Value.i, *UserData)
; >> [Private] Recursive helper function for CBWalk <<
Procedure.i CBWalk_r(*Node.CritBitNode, *Callback.WalkFn, *UserData, Ascending)
Protected.CritBitNodeE *ExternalNode
Protected.i RetVal
If *Node & #EXTERNAL_NODE
*ExternalNode = *Node & -2
ProcedureReturn *Callback(PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen), *ExternalNode\Value, *UserData)
Else
RetVal = CBWalk_r(*Node\Child[Ascending ! 1], *Callback, *UserData, Ascending)
If RetVal
ProcedureReturn RetVal
Else
ProcedureReturn CBWalk_r(*Node\Child[Ascending], *Callback, *UserData, Ascending)
EndIf
EndIf
EndProcedure
; >< [Public] CBWalk ><-------------------------------------------------------;
; ;
; Iterates over all keys matching a given prefix using a callback function ;
; The callback function has to look like WalkFn(Key.s, Value.i, *UserData) ;
; The callback function can be stopped by returning true ;
; ----------------------------------------------------------------------------;
Procedure CBWalk(*CB.CritBitTree, *Callback, Prefix.s = "", *UserData = #Null, Ascending = #True)
Protected.CritBitNode *Top
*Top = CBFindTop(*CB, @Prefix)
If *Top
Ascending & 1
ProcedureReturn CBWalk_r(*Top, *Callback, *UserData, Ascending)
EndIf
EndProcedure
; >< [Public] CBGetKeys ><---------------------------------------;
; ;
; Fills the Keys() array with all keys matching a given prefix ;
; It is possible to supply a maximum number for the results ;
; and an offset from the results where to start ;
; ---------------------------------------------------------------;
Procedure.i CBGetKeys(*CB.CritBitTree, Array Keys.s(1), Prefix.s = "", MaxResults = -1, Offset = 0, Ascending = #True)
Protected.CritBitNodeE *ExternalNode
Protected.CritBitStats Stats
Protected.CritBitNode *Node
Protected.i i, n
*Node = CBFindTop(*CB, @Prefix)
If *Node = #Null
ProcedureReturn 0
EndIf
; Count the number of keys matching the given prefix
CBStats_r(*Node, @Stats)
; Calculate the amount of Keys given MaxResults and Offset
If MaxResults < 0 Or MaxResults > Stats\Count - Offset
MaxResults = Stats\Count - Offset
EndIf
If MaxResults <= 0
ProcedureReturn -1
EndIf
; Redim the Keys array
ReDim Keys(MaxResults - 1)
; Create a node stack to iterate non recursively
Protected Dim *NodeStack.CritBitNode(Stats\MaxDepth)
*NodeStack(0) = *Node
; Process all keys
Ascending & 1
While i < MaxResults
If *NodeStack(n) & #EXTERNAL_NODE
*ExternalNode = *NodeStack(n) & -2
If Offset > 0
Offset - 1
Else
Keys(i) = PeekS(@*ExternalNode\Key, *ExternalNode\KeyLen)
i + 1
EndIf
n - 1
If n < 0
Break
EndIf
Else
*NodeStack(n + 1) = *NodeStack(n)\Child[Ascending ! 1]
*NodeStack(n) = *NodeStack(n)\Child[Ascending]
n + 1
EndIf
Wend
; Return the amount of keys
ProcedureReturn i
EndProcedure
; >< [Public] CBSize ><-----------------------------------;
; ;
; Returns the number of keys the crit-bit tree contains ;
; --------------------------------------------------------;
Procedure.i CBSize(*CB.CritBitTree)
If *CB
ProcedureReturn *CB\Size
EndIf
EndProcedure
EndModule