Test code loads mozilla GB dictionary from marco's git, not sure if my cursory pass of the file is 100% but it should show it working
Update 0.95
changed structure to array based to facilitate load, save and catch and to keep the memory size the same as x86 on x64
while it will dynamically grow it will incur excessive page faults, so you need to allocate enough slots
Update 0.96
Mashed it into scintiila autocomplete
*note it's still a little buggy
Return enter selection space bar to cancel
Tested on xp with debugger off works better, linux x64 with debugger seems fine
Update 0.96.1
Code: Select all
;note if your rebuilding this between versions please make sure you deleted the file en-GB_trie.tri
DeclareModule TRIE
; Nibbled TRIE, for unicode strings; Access time O(m*2)
; Array based can be saved loaded and caught from memory with compression
; Note node value -1 is reserved for traversal checks
; Author: Idle
; Version: 0.96.1
; Pb 5.71 lts
; Created: 24/01/2020
; Licence MIT
#TRIE_MAX = 1024 ;Max key length in bytes for enumeration of Trie
Structure trie_node
index.l[16]
value.l
EndStructure
Structure Trie
*vt
size.l
avail.l
position.l
datasize.l
nodecount.l
count.l
sb.a[#TRIE_MAX]
*root.Trie_node
EndStructure
Prototype Trie_CB(*Input,len)
Declare Trie_New(size=512)
Declare Trie_Free(*this.Trie)
Declare Trie_Set(*this.Trie,*key,len,value.l=1,mode=#PB_UTF8)
Declare Trie_Get(*this.Trie,*key,len,mode=#PB_UTF8)
Declare Trie_iEnum(*this.Trie,*node.Trie_Node,depth,*pfn.Trie_CB)
Declare Trie_Enum(*this.Trie,*key,len,*pfn.Trie_CB,ReturnMatch=0,mode=#PB_UTF8)
Declare Trie_Clear(*this.Trie)
Interface ITrie
Free()
Set(*key,StringByteLength,value.l=1,mode=#PB_UTF8)
Get(*key,StringBytelength,mode=#PB_UTF8)
Enum(*key,StringBytelength,*pfn.Trie_CB,ReturnMatch=0,mode=#PB_UTF8)
Clear()
Load(file.s)
Save(File.s,CompressLevel=4)
Catch(Address)
Stats(item)
EndInterface
EndDeclareModule
Module TRIE
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
ImportC "zlib.lib"
compress2(*dest,*destlen,*source,sourcelen,level)
uncompress(*dest,*destlen,*source,sourcelen)
EndImport
CompilerElse
ImportC "-lz"
compress2(*dest,*destlen,*source,sourcelen,level)
uncompress(*dest,*destlen,*source,sourcelen)
EndImport
CompilerEndIf
EnableExplicit
;Thanks Wilbert for a sane version of PeekN and PokeN
;Peek Nibble in order High Low
Macro PeekNHL(in,Index,out)
*Mem.Ascii = in
*Mem + Index >> 1
If Index & 1
out = *Mem\a & $f
Else
out = *Mem\a >> 4
EndIf
EndMacro
Macro PokeNHL(in, Number, Index)
*Mem.Ascii = in
*Mem + Index >> 1
If Index & 1
*Mem\a = (*Mem\a & $f0) | (Number & $f)
Else
*Mem\a = (*Mem\a & $0f) | (Number << 4)
EndIf
EndMacro
Procedure Trie_Stats(*this.TRIE,item=1)
Select item
Case 1
ProcedureReturn *this\datasize
Case 2
ProcedureReturn MemorySize(*this\root)
Case 3
ProcedureReturn MemorySize(*this\root) / *this\datasize
Case 4
ProcedureReturn *this\nodecount
Case 5
ProcedureReturn *this\count
EndSelect
EndProcedure
Procedure Trie_Clear(*this.Trie)
Protected idx,*node.Trie_node,size
size = MemorySize(*this\root)
While idx < size
*node = *this\root+idx
If *node\value
*node\value = -1
EndIf
idx + SizeOf(Trie_node)
Wend
EndProcedure
Procedure Trie_New(size=512)
Protected *obj.Trie = AllocateMemory(SizeOf(Trie))
If *obj
*obj\vt=?vt_Trie
*obj\size = size * SizeOf(Trie_node)
*obj\avail= *obj\size - SizeOf(Trie_node)
*obj\position = SizeOf(Trie_node)
*obj\root = AllocateMemory(*obj\size)
EndIf
ProcedureReturn *obj
EndProcedure
Procedure Trie_Free(*this.Trie)
FreeMemory(*this\root)
FreeMemory(*this)
EndProcedure
;Set or Unset a value in the Trie, len must be StringByteLength
Procedure Trie_Set(*this.Trie,*key,len,value.l=1,mode=#PB_UTF8)
Protected *node.Trie_node,*ikey,ilen,a,idx,*mem.Ascii,offset
*node = *this\root
If mode <> #PB_UTF8
*ikey = UTF8(PeekS(*key,len))
len = MemorySize(*ikey)
Else
*ikey = *key
EndIf
*this\datasize + len
ilen = (len*2)-1
For a = 0 To ilen
PeekNHL(*ikey,a,idx)
If *node\index[idx]
offset = *node\index[idx]
*node = *this\root + offset
ElseIf *this\avail > 0
offset = *this\Position
*node\index[idx] = offset
*node = *this\root + offset
*node\value = -1
*this\nodecount + 1
*this\avail - SizeOf(Trie_node)
*this\Position+SizeOf(Trie_node)
Else
*this\avail = 512*SizeOf(Trie_node)
*this\size + *this\avail
*this\root = ReAllocateMemory(*this\root,*this\size)
*node = *this\root + offset
offset = *this\Position
*node\index[idx] = offset ;
*node = *this\root + offset
*node\value = -1
*this\nodecount + 1
*this\avail - SizeOf(Trie_node)
*this\Position+SizeOf(Trie_node)
EndIf
Next
If *node\value
*this\count+1
*node\value = value
EndIf
If mode <> #PB_UTF8
FreeMemory(*ikey)
EndIf
EndProcedure
Procedure Trie_Get(*this.Trie,*key,len,mode=#PB_UTF8)
;Find the value of the key returns the value or 0
Protected *ikey,ilen,*node.Trie_Node,a,idx.i,*mem.Ascii
If mode <> #PB_UTF8
*ikey = UTF8(PeekS(*key,len))
len = MemorySize(*ikey)
Else
*ikey = *key
EndIf
ilen = (len*2)-1
*node.Trie_node = *this\root
For a = 0 To ilen
PeekNHL(*ikey,a,idx)
If *node
If *node\index[idx]
*node = *this\root + *node\index[idx]
Else
If mode <> #PB_UTF8
FreeMemory(*ikey)
EndIf
ProcedureReturn 0
EndIf
Else
FreeMemory(*ikey)
ProcedureReturn 0
EndIf
Next
If mode <> #PB_UTF8
FreeMemory(*ikey)
EndIf
If *node\value > -1
ProcedureReturn *node\value
EndIf
EndProcedure
Procedure Trie_iEnum(*this.Trie,*node.Trie_Node,depth,*pfn.Trie_CB)
Protected a.i,tkey.s,*mem.Ascii
If *node\value = 0
ProcedureReturn 0
EndIf
;build the key
For a=0 To 15
If *node\index[a]
PokeNHL(@*this\sb,a,depth)
Trie_iEnum(*this,*this\root + *node\index[a],depth+1,*pfn)
EndIf
Next
;lookup the key value
If *node\value > 0
If *pfn
*pfn(@*this\sb,depth/2-1)
EndIf
EndIf
ProcedureReturn *node
EndProcedure
Procedure Trie_Enum(*this.Trie,*key,len,*pfn.Trie_CB,ReturnMatch=0,mode=#PB_UTF8)
;Enumerate all the entries below the given root key
Protected *node.Trie_Node,*tnode.Trie_Node,tkey.i,ilen.i,pos.i,*mem.Ascii,a,*ikey,lkey
If mode <> #PB_UTF8
*ikey = UTF8(PeekS(*key,len))
len = MemorySize(*ikey)
ilen =((len-1)*2)-1
Else
*ikey = *key
ilen =(len*2)-1
EndIf
FillMemory(@*this\sb[0],#TRIE_MAX,0)
If ReturnMatch
If Trie_Get(*this,*ikey,len)
If *pfn
*pfn(*ikey,len)
EndIf
ProcedureReturn
EndIf
EndIf
*node = *this\root
For a = 0 To ilen
PeekNHL(*ikey,a,tkey)
*node = *this\root + *node\index[tkey]
If *node\value = 0
If mode <> #PB_UTF8
FreeMemory(*ikey)
EndIf
ProcedureReturn 0
EndIf
Next
CopyMemory(*ikey,@*this\sb,MemorySize(*ikey))
Trie_iEnum(*this,*node,a,*pfn)
If mode <> #PB_UTF8
FreeMemory(*ikey)
EndIf
EndProcedure
Procedure Trie_SaveToFile(*this.trie,file.s,CompressLevel=4)
Protected fn,len,*dst,destlen,srclen
fn = OpenFile(#PB_Any,file)
If fn
srclen = MemorySize(*this\root)
destlen = (srclen * 1.5) + SizeOf(trie) + 8
*dst = AllocateMemory(destlen)
If *dst
If Compress2(*dst+(SizeOf(trie)+8),@destlen,*this\root,srclen,CompressLevel) = 0
PokeL(*dst,srclen)
PokeL(*dst+4,destlen)
CopyMemory(*this,*dst+8,SizeOf(Trie))
len = WriteData(fn,*dst,destlen+SizeOf(trie)+8)
CloseFile(fn)
FreeMemory(*dst)
ProcedureReturn len
EndIf
EndIf
EndIf
EndProcedure
Procedure Trie_LoadFile(*this.trie,file.s)
Protected fn,destlen,srclen,*src,*dst
fn = OpenFile(#PB_Any,file)
If fn
destlen = ReadLong(fn)
*src = AllocateMemory(destlen)
srclen = ReadLong(fn)
*src = AllocateMemory(srclen)
FreeMemory(*this\root)
ReadData(fn,*this,SizeOf(trie))
ReadData(fn,*src,srclen)
*this\vt=?vt_Trie
*this\root = AllocateMemory(destlen)
If uncompress(*this\root,@destlen,*src,srclen) = 0
FreeMemory(*src)
CloseFile(fn)
ProcedureReturn destlen
EndIf
EndIf
EndProcedure
Procedure Trie_Catch(*this.Trie,*mem,len)
Protected destlen,srclen,*src
If *mem
FreeMemory(*this\root)
destlen = PeekL(*mem)
srclen = PeekL(*mem+4)
CopyMemory(*mem+8,*this,SizeOf(Trie))
*this\vt=?vt_Trie
*this\root = AllocateMemory(destlen)
If uncompress(*this\root,@destlen,*mem+SizeOf(Trie)+8,srclen) = 0
ProcedureReturn destlen
EndIf
EndIf
EndProcedure
DataSection: vt_Trie:
Data.i @Trie_Free()
Data.i @Trie_Set()
Data.i @Trie_Get()
Data.i @Trie_Enum()
Data.i @Trie_Clear()
Data.i @Trie_LoadFile()
Data.i @Trie_SaveToFile()
Data.i @Trie_Catch()
Data.i @Trie_Stats()
EndDataSection
EndModule
CompilerIf #PB_Compiler_IsMainFile
;set compiler to create exe in current directory
;On 1st run it will downloads mozilla GB english dictionary and build the Trie
;when you close the window it will compress and save the Trie to file
;On 2nd run it will Load the saved Trie
;if you uncoment Include binary down the bottom it will catch the Trie from memory
UseModule TRIE
EnableExplicit
#Auto_Start =0
#Auto_Get =1
#Auto_Enum =2
Global TmpPath.s = GetTemporaryDirectory() +"en-GB.dic"
Global TriePath.s = GetPathPart(ProgramFilename()) + "en-GB_trie.tri"
Global *mt.ITrie = Trie_New(700384) ;specify number of nodes required, realloc can be very slow
Global index,result
Global *buf = AllocateMemory(1024)
Global itemsize=1024*1024
Global itempos
Global *items = AllocateMemory(itemsize) ;utf8 list of words for lookup
Global *text
Procedure ClearItems()
FillMemory(*items,itemsize,0)
EndProcedure
Procedure cbTrie(*key,len)
Protected out.s
If (itempos + len + 1) < MemorySize(*items)
CopyMemory(*key,*items+itempos,len)
out = PeekS(*items+itempos,len,#PB_UTF8)
Debug out
Debug Str(Len(out)) + " " + Str(len)
itempos + len
PokeA(*items+itempos,32)
itempos + 1
Else
itemsize + len + 1
*items = ReAllocateMemory(*items,itemsize)
CopyMemory(*key,*items+itempos,len)
itempos + len
PokeA(*items+itempos,32)
itempos + 1
EndIf
EndProcedure
Procedure LoadDicFile(Dicfile.s,*trie.ITrie)
Protected fn,fm,pos,word.s,count
fn = ReadFile(#PB_Any,Dicfile)
If fn
fm = ReadStringFormat(fn)
While Not Eof(fn)
word = ReadString(fn,fm,-1)
pos = FindString(word,"/")
If pos > 0
word = Left(word,pos-1)
EndIf
word = LCase(word)
*trie\Set(@word,StringByteLength(word),1,#PB_Unicode)
count+1
Wend
CloseFile(fn)
EndIf
ProcedureReturn count
EndProcedure
Procedure ScintillaCallBack(Gadget, *scinotify.SCNotification)
Static Auto_state.i
Static pos.i,space.i
Protected mods = (#SC_MOD_DELETETEXT | #SC_PERFORMED_USER)
Select *scinotify\nmhdr\code
Case #SCN_AUTOCSELECTION
Debug "selected " + PeekS(*scinotify\text,-1,#PB_UTF8)
Auto_state = #Auto_Start
;ScintillaSendMessage(0,#SCI_AUTOCCANCEL)
pos = 0
Case #SCN_MODIFIED
If *scinotify\modificationType = mods Or *scinotify\modificationType = (mods | #SC_STARTACTION)
pos - *scinotify\length
Debug "back spaced " + *scinotify\length
If pos < 1
Debug "cancel from back space"
Auto_state = #Auto_Start
ScintillaSendMessage(0,#SCI_AUTOCCANCEL)
pos = 0
EndIf
EndIf
Case #SCN_CHARADDED
Select *scinotify\ch
Case $A,$20
If Auto_state < #Auto_Enum
Debug "by space " + PeekS(*buf,pos,#PB_UTF8)
If *mt\Get(*buf,pos+1) = 0
Debug "added "
*mt\Set(*buf,pos+1)
EndIf
Auto_state = #Auto_Start
pos=0
EndIf
Default
PokeA(*buf+pos,*scinotify\ch)
If pos > 1
Auto_state = #Auto_Get
ClearItems()
*mt\enum(*buf,pos+1,@cbTrie())
itempos=0
If ScintillaSendMessage(0,#SCI_AUTOCACTIVE) = 0
ScintillaSendMessage(0,#SCI_AUTOCSHOW,pos+1,*items)
Else
ScintillaSendMessage(0,#SCI_AUTOCCANCEL)
ScintillaSendMessage(0,#SCI_AUTOCSHOW,pos+1,*items)
EndIf
EndIf
pos+1
EndSelect
Default
EndSelect
EndProcedure
If ?TRIE_DICTEND - ?TRIE_DICT > 0
result = *mt\Catch(?TRIE_DICT)
Debug "Caught Trie :" + Str(result) + " bytes"
ElseIf FileSize(TriePath) > 0
result = *mt\Load(TriePath)
Debug "Loaded Trie :" + Str(result) + " bytes"
Else
InitNetwork()
If ReceiveHTTPFile("https://raw.githubusercontent.com/marcoagpinto/aoo-mozilla-en-dict/master/en_GB%20(Marco%20Pinto)/en-GB.dic",TmpPath)
loadDicfile(TmpPath,*mt)
Debug "raw bytes loaded =" + Str(*mt\Stats(1))
Debug "size of data stored = " + Str(*mt\Stats(2))
Debug "data / raw = " + Str(*mt\Stats(3))
Debug "nodes " + Str(*mt\Stats(4))
Debug "items " + Str(*mt\Stats(5))
Else
MessageRequester("OOPS","failed to download")
End
EndIf
EndIf
If OpenWindow(0, 0, 0, 800, 600, "ScintillaGadget", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
If InitScintilla()
ScintillaGadget(0,0,0,800, 600,@ScintillaCallBack())
ScintillaSendMessage(0,#SCI_AUTOCSETIGNORECASE,#True)
ScintillaSendMessage(0,#SCI_AUTOCSETAUTOHIDE,0)
ScintillaSendMessage(0,#SCI_AUTOCSETMAXHEIGHT,10)
ScintillaSendMessage(0,#SCI_AUTOCSETORDER,#SC_ORDER_PRESORTED)
ScintillaSendMessage(0, #SCI_STYLESETFORE, 0, RGB(0, 0, 0))
*Text=UTF8(" ")
ScintillaSendMessage(0, #SCI_AUTOCSTOPS,0,*text)
FreeMemory(*Text)
*Text=UTF8("Autocomplete test in ScintillaGadget...")
ScintillaSendMessage(0, #SCI_SETTEXT, 0,*Text)
FreeMemory(*Text)
EndIf
Repeat : Until WaitWindowEvent() = #PB_Event_CloseWindow
EndIf
CloseWindow(0)
If FileSize(TriePath) < 0
result = *mt\Save(TriePath)
Debug "Saved Trie :" + Str(result) + " bytes"
EndIf
*mt\clear()
*mt\free()
DataSection : TRIE_DICT:
;IncludeBinary #PB_Compiler_FilePath + "en-GB_trie.tri"
TRIE_DICTEND:
EndDataSection
CompilerEndIf