Add COLLATE NOACCENT to sqlite table column
- doctorized
- Addict
- Posts: 856
- Joined: Fri Mar 27, 2009 9:41 am
- Location: Athens, Greece
Add COLLATE NOACCENT to sqlite table column
I did some tests with MySQL Workbench where I used 'utf8mb4_0900_ai_ci' collation and managed to run some queries that I needed but I fail to add this collation to utf8 SQLite db with PB. How can I add accent insensitive collation?
Re: Add COLLATE NOACCENT to sqlite table column
Since no one answered up to now...
You need to write an own collation procedure and publish it to SQLite.
Unfortunately this does not work directly with LIKE.
So you need also to write a 'replacement' for LIKE.
Good luck.
You need to write an own collation procedure and publish it to SQLite.
Unfortunately this does not work directly with LIKE.
So you need also to write a 'replacement' for LIKE.
Good luck.
- doctorized
- Addict
- Posts: 856
- Joined: Fri Mar 27, 2009 9:41 am
- Location: Athens, Greece
Re: Add COLLATE NOACCENT to sqlite table column
I have read this again but how can it be implemented? A simple example would be appreciated, if could be delivered by anybody.
Re: Add COLLATE NOACCENT to sqlite table column
At the moment I have no time to step in
https://www.sqlite.org/c3ref/create_collation.html
https://stackoverflow.com/questions/268 ... ke-queries
http://sqlite.1065341.n5.nabble.com/sql ... 49949.html
Ugly alternative:
https://stackoverflow.com/questions/549 ... ts-sqlite3
https://www.sqlite.org/c3ref/create_collation.html
https://stackoverflow.com/questions/268 ... ke-queries
http://sqlite.1065341.n5.nabble.com/sql ... 49949.html
Ugly alternative:
https://stackoverflow.com/questions/549 ... ts-sqlite3
- doctorized
- Addict
- Posts: 856
- Joined: Fri Mar 27, 2009 9:41 am
- Location: Athens, Greece
Re: Add COLLATE NOACCENT to sqlite table column
Thank you very much. I have seen them and many others like them. I am not familiar with function creation so I am oriented to something like the ungly one you mention at the end. For now seems good to me.infratec wrote:At the moment I have no time to step in
https://www.sqlite.org/c3ref/create_collation.html
https://stackoverflow.com/questions/268 ... ke-queries
http://sqlite.1065341.n5.nabble.com/sql ... 49949.html
Ugly alternative:
https://stackoverflow.com/questions/549 ... ts-sqlite3
Re: Add COLLATE NOACCENT to sqlite table column
Try this:
I don't know how to remove the own like function.
Code: Select all
EnableExplicit
#SQLITE_UTF8 = 1 ; IMP: R-37514-35566
#SQLITE_UTF16LE = 2 ; IMP: R-03371-37637
#SQLITE_UTF16BE = 3 ; IMP: R-51971-34154
#SQLITE_UTF16 = 4 ; Use native byte order
#SQLITE_ANY = 5 ; Deprecated
#SQLITE_UTF16_ALIGNED = 8 ; sqlite3_create_collation only
#SQLITE_STATIC = 0
#SQLITE_TRANSIENT = -1
ImportC "sqlite3.lib"
sqlite3_create_function16.i(*sqlite3_context, zFunctionName.s, nArg.i, eTextRep.i, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_value_text16.i(*sqlite3_value)
sqlite3_result_int(*sqlite3_context, int.i)
sqlite3_create_collation16.i(*sqlite3_context, zName.s, eTextRep.i, *pArg, *xCompare)
EndImport
Procedure.s RemoveAccent(Input$)
DataSection
Vocals:
Data.c 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'
AccVocals:
Data.c 'á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ì', 'ò', 'ù', 'ä', 'ë', 'ï', 'ö', 'ü', 'â', 'ê', 'î', 'ô', 'û'
EndDataSection
Protected i.i
For i = 0 To 19
ReplaceString(Input$, Chr(PeekC(?AccVocals + i * 2)), Chr(PeekC(?Vocals + i * 2)), #PB_String_InPlace)
Next i
ProcedureReturn Input$
EndProcedure
ProcedureC.i noaccent(*Arg, S1.i, String1$, S2.i, String2$)
Protected Result.i, Str1$, Str2$
;Debug Str(S1) + " " + String1$ + " --- " + Str(S2) + " " + String2$
Str1$ = RemoveAccent(LCase(String1$))
Str2$ = RemoveAccent(LCase(String2$))
;Debug Str1$ + " --- " + Str2$
If Str1$ < Str2$
Result = -1
ElseIf Str1$ > Str2$
Result = 1
Else
Result = 0
EndIf
ProcedureReturn Result
EndProcedure
; ProcedureC NoAccentLike(*context, argc.i, *argv)
;
; Protected Like$, Text$, Result.i, RegEx.i
;
; If argc = 2
;
; Like$ = PeekS(sqlite3_value_text16(PeekI(*argv)))
; If Like$ <> ""
; Text$ = PeekS(sqlite3_value_text16(PeekI(*argv + SizeOf(Integer))))
;
; ;Debug Like$ + " -> " + Text$
;
; Like$ = ReplaceString(Like$, "%", ".*")
; Like$ = ReplaceString(Like$, "_", ".")
;
; Text$ = RemoveAccent(Text$)
;
; RegEx = CreateRegularExpression(#PB_Any, Like$)
; If IsRegularExpression(RegEx)
; Result = MatchRegularExpression(RegEx, Text$)
; FreeRegularExpression(RegEx)
; EndIf
;
; EndIf
; EndIf
;
; sqlite3_result_int(*context, Result)
;
; EndProcedure
ProcedureC NoAccentLike(*context, argc.i, *argv)
Protected Like$, Text$, Result.i
Static LastLike$, RegEx.i
If argc = 2
Like$ = PeekS(sqlite3_value_text16(PeekI(*argv)))
If Like$ <> ""
If LastLike$ <> Like$
LastLike$ = Like$
If IsRegularExpression(RegEx)
FreeRegularExpression(RegEx)
EndIf
Like$ = ReplaceString(Like$, "%", ".*")
Like$ = ReplaceString(Like$, "_", ".")
RegEx = CreateRegularExpression(#PB_Any, Like$)
EndIf
If IsRegularExpression(RegEx)
Text$ = PeekS(sqlite3_value_text16(PeekI(*argv + SizeOf(Integer))))
Text$ = RemoveAccent(Text$)
Result = MatchRegularExpression(RegEx, Text$)
EndIf
EndIf
EndIf
sqlite3_result_int(*context, Result)
EndProcedure
Define DB.i, StartTime.i, EndTime.i, i.i
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB
sqlite3_create_collation16(DatabaseID(DB), "noaccent", #SQLITE_UTF16, #Null, @noaccent())
DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
CompilerIf Not #PB_Compiler_Debugger
For i = 0 To 10000
CompilerEndIf
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau 1')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae 2')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('3 blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('4 bláe')")
CompilerIf Not #PB_Compiler_Debugger
Next i
CompilerEndIf
If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description = 'blae' COLLATE noaccent")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
Else
Debug DatabaseError()
EndIf
Debug "-----"
sqlite3_create_function16(DatabaseID(DB), "like", 2, #SQLITE_UTF16, #Null, @NoAccentLike(), #Null, #Null)
StartTime = ElapsedMilliseconds()
If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%blae%'")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
Else
Debug DatabaseError()
EndIf
EndTime = ElapsedMilliseconds()
CompilerIf Not #PB_Compiler_Debugger
MessageRequester("Needed", Str(EndTime - StartTime))
CompilerEndIf
CloseDatabase(DB)
EndIf
Last edited by infratec on Sat Dec 07, 2019 10:13 pm, edited 4 times in total.
Re: Add COLLATE NOACCENT to sqlite table column
I added the own LIKE
Re: Add COLLATE NOACCENT to sqlite table column
I optimized the speed of LIKE
In my test with 40000 rows it needs now 79ms instead of 125ms
In my test with 40000 rows it needs now 79ms instead of 125ms
Re: Add COLLATE NOACCENT to sqlite table column
It's much faster to implement your own LIKE with a lookup table.
(of course an asm version of the code is even faster )
I implemented the easiest version for now.
There's also a more optimized one which is more efficient at handling match all '%'.
I might try that one as well.
The code below supports both LIKE with 2 and 3 arguments (ESCAPE).
I didn't specify a library for the ImportC to make it work on both Windows and macOS.
(of course an asm version of the code is even faster )
I implemented the easiest version for now.
There's also a more optimized one which is more efficient at handling match all '%'.
I might try that one as well.
The code below supports both LIKE with 2 and 3 arguments (ESCAPE).
I didn't specify a library for the ImportC to make it work on both Windows and macOS.
Code: Select all
#PatCmp_MatchAll = '%'
#PatCmp_MatchOne = '_'
Procedure.i PatCmp(*str.Unicode, *pat.Unicode, Array lut.u(1), esc.l = 0)
; wildcard matching algorithm based on code from
; http://dodobyte.com/wildcard.html
Protected *pat_, *str_
; check arguments
If *pat = #Null Or *str = #Null
ProcedureReturn #False
EndIf
; while *str has characters
While *str\u
If *pat\u
; handle 'match all'
If *pat\u = #PatCmp_MatchAll
*pat + 2
If *pat\u = 0
ProcedureReturn #True
EndIf
*pat_ = *pat
*str_ = *str
Continue
EndIf
; handle escape character
If *pat\u = esc
*pat + 2
If *pat\u = 0
ProcedureReturn #False
EndIf
EndIf
; lookup characters and compare
If lut(*pat\u) = lut(*str\u) Or *pat\u = #PatCmp_MatchOne
*pat + 2
*str + 2
Continue
EndIf
EndIf
; characters didn't match
If *pat_ = #Null
ProcedureReturn #False
EndIf
*str_ + 2
*str = *str_
*pat = *pat_
Wend
; *str has no more characters
; check if pattern ends with 'match all'
While *pat\u = #PatCmp_MatchAll
*pat + 2
Wend
If *pat\u = 0
ProcedureReturn #True
Else
ProcedureReturn #False
EndIf
EndProcedure
EnableExplicit
;- Constants
#SQLITE_UTF16 = 4
;- Imports
ImportC ""
sqlite3_create_function16.i(*db, zFunctionName.p-unicode, nArg, eTextRep, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_result_int(*sqlite3_context, int)
sqlite3_value_text16.i(*sqlite3_value)
EndImport
;- Globals
Global Dim LUT_Like.u(65535)
;- Procedures
Procedure LUT_Init()
Protected c1.s = "ÁÉÍÓÚÀÈÌÒÙÄÉÏÖÜÂÊÎÔÛáéíóúàèìòùäëïöüâêîôû"
Protected c2.s = "aeiouaeiouaeiouaeiouaeiouaeiouaeiouaeiou"
Protected.Unicode *c1=@c1, *c2=@c2
Protected.i c
For c = 0 To 65535
LUT_Like(c) = c
Next
For c = 'A' To 'Z'
LUT_Like(c) = c + 32
Next
While *c1\u And *c2\u
LUT_Like(*c1\u) = *c2\u
*c1 + 2
*c2 + 2
Wend
EndProcedure
LUT_Init()
ProcedureC Like(*context, argc, *argv.Integer)
Protected.Unicode *pat, *str, *esc
Protected.l esc
; get pattern and string
*pat = sqlite3_value_text16(*argv\i)
*argv + SizeOf(Integer)
*str = sqlite3_value_text16(*argv\i)
; three argument LIKE
If argc = 3
*argv + SizeOf(Integer)
*esc = sqlite3_value_text16(*argv\i)
If *esc = #Null
ProcedureReturn
EndIf
esc = *esc\u
*esc + 2
If *esc\u
ProcedureReturn
EndIf
EndIf
; set result
sqlite3_result_int(*context, PatCmp(*str, *pat, LUT_Like(), esc))
EndProcedure
;- Main code
Define DB.i, StartTime.i, EndTime.i, i.i
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB
DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
CompilerIf Not #PB_Compiler_Debugger
For i = 0 To 10000
CompilerEndIf
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaE')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
CompilerIf Not #PB_Compiler_Debugger
Next i
CompilerEndIf
sqlite3_create_function16(DatabaseID(DB), "like", -1, #SQLITE_UTF16, #Null, @Like(), #Null, #Null)
StartTime = ElapsedMilliseconds()
If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%e' ESCAPE '\'")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
Else
Debug DatabaseError()
EndIf
EndTime = ElapsedMilliseconds()
CompilerIf Not #PB_Compiler_Debugger
MessageRequester("Needed", Str(EndTime - StartTime))
CompilerEndIf
CloseDatabase(DB)
EndIf
Windows (x64)
Raspberry Pi OS (Arm64)
Raspberry Pi OS (Arm64)
- doctorized
- Addict
- Posts: 856
- Joined: Fri Mar 27, 2009 9:41 am
- Location: Athens, Greece
Re: Add COLLATE NOACCENT to sqlite table column
All codes run ok when running as is but infratec's code is not working if we add the values 'blau 1', 'blae 2', '3 blaé', '4 bláe' and do search with description like '%blae%'. On the other hand, wilbert's codes runs ok in this situation and is the one I need. Thank you both very much for your help! I really appreciate it!
Re: Add COLLATE NOACCENT to sqlite table column
Hm...
withand
I get
blae
blaé
bláe
blae 2
3 blaé
4 bláe
I think that's right.
What is wrong in your case?
Btw. wilberts code returns the same result.
(I included your example in my code above)
with
Code: Select all
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau 1')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae 2')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('3 blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('4 bláe')")
Code: Select all
SELECT * FROM myTable WHERE description LIKE '%blae%'
blae
blaé
bláe
blae 2
3 blaé
4 bláe
I think that's right.
What is wrong in your case?
Btw. wilberts code returns the same result.
(I included your example in my code above)
Re: Add COLLATE NOACCENT to sqlite table column
Good to hear the code works for you.
Here's also the optimized version.
It should give the same results but is significantly faster when the pattern contains '%' and the database strings are longer.
I also tried an asm version which is again faster but since it's asm code, it's much harder to read the code.
Here's also the optimized version.
It should give the same results but is significantly faster when the pattern contains '%' and the database strings are longer.
I also tried an asm version which is again faster but since it's asm code, it's much harder to read the code.
Code: Select all
#PatCmp_MatchAll = '%'
#PatCmp_MatchOne = '_'
Procedure.i PatCmp(*str.Unicode, *pat.Unicode, Array lut.u(1), esc.l = 0)
; wildcard matching algorithm based on code from
; http://dodobyte.com/wildcard.html
Protected *pat_, *str_, lc.u
; check arguments
If *pat = #Null Or *str = #Null
ProcedureReturn #False
EndIf
; while *str has characters
While *str\u
If *pat\u
; handle 'match one'
If *pat\u = #PatCmp_MatchOne
*pat + 2
*str + 2
Continue
EndIf
; handle 'match all'
If *pat\u = #PatCmp_MatchAll
Repeat
*pat + 2
While *pat\u = #PatCmp_MatchOne
*pat + 2
*str + 2
If *str\u = 0
Break 3
EndIf
Wend
Until *pat\u <> #PatCmp_MatchAll
If *pat\u = 0
ProcedureReturn #True
EndIf
If *pat\u = esc
*pat + 2
If *pat\u = 0
ProcedureReturn #False
EndIf
EndIf
*pat_ = *pat
Goto NextMatch
EndIf
; compare characters
If *pat\u = esc
*pat + 2
If *pat\u = 0
ProcedureReturn #False
EndIf
EndIf
If lut(*pat\u) = lut(*str\u)
*pat + 2
*str + 2
Continue
EndIf
EndIf
; characters didn't match
If *pat_ = #Null
ProcedureReturn #False
EndIf
*pat = *pat_
*str = *str_ + 2
; find next match after 'match all'
NextMatch:
lc = lut(*pat\u)
While lc <> lut(*str\u)
*str + 2
If *str\u = 0
ProcedureReturn #False
EndIf
Wend
*str_ = *str
*str + 2
*pat + 2
Wend
; *str has no more characters
; check if pattern ends with 'match all'
While *pat\u = #PatCmp_MatchAll
*pat + 2
Wend
If *pat\u = 0
ProcedureReturn #True
Else
ProcedureReturn #False
EndIf
EndProcedure
EnableExplicit
;- Constants
#SQLITE_UTF16 = 4
;- Imports
ImportC ""
sqlite3_create_function16.i(*db, zFunctionName.p-unicode, nArg, eTextRep, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_result_int(*sqlite3_context, int)
sqlite3_value_text16.i(*sqlite3_value)
EndImport
;- Globals
Global Dim LUT_Like.u(65535)
;- Procedures
Procedure LUT_Init()
Protected c1.s = "ÁÉÍÓÚÀÈÌÒÙÄÉÏÖÜÂÊÎÔÛáéíóúàèìòùäëïöüâêîôû"
Protected c2.s = "aeiouaeiouaeiouaeiouaeiouaeiouaeiouaeiou"
Protected.Unicode *c1=@c1, *c2=@c2
Protected.i c
For c = 0 To 65535
LUT_Like(c) = c
Next
For c = 'A' To 'Z'
LUT_Like(c) = c + 32
Next
While *c1\u And *c2\u
LUT_Like(*c1\u) = *c2\u
*c1 + 2
*c2 + 2
Wend
EndProcedure
LUT_Init()
ProcedureC Like(*context, argc, *argv.Integer)
Protected.Unicode *pat, *str, *esc
Protected.l esc
; get pattern and string
*pat = sqlite3_value_text16(*argv\i)
*argv + SizeOf(Integer)
*str = sqlite3_value_text16(*argv\i)
; three argument LIKE
If argc = 3
*argv + SizeOf(Integer)
*esc = sqlite3_value_text16(*argv\i)
If *esc = #Null
ProcedureReturn
EndIf
esc = *esc\u
*esc + 2
If *esc\u
ProcedureReturn
EndIf
EndIf
; set result
sqlite3_result_int(*context, PatCmp(*str, *pat, LUT_Like(), esc))
EndProcedure
;- Main code
Define DB.i, StartTime.i, EndTime.i, i.i
UseSQLiteDatabase()
DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB
DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
CompilerIf Not #PB_Compiler_Debugger
For i = 0 To 10000
CompilerEndIf
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaE')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
CompilerIf Not #PB_Compiler_Debugger
Next i
CompilerEndIf
sqlite3_create_function16(DatabaseID(DB), "like", -1, #SQLITE_UTF16, #Null, @Like(), #Null, #Null)
StartTime = ElapsedMilliseconds()
If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%e' ESCAPE '\'")
While NextDatabaseRow(DB)
Debug GetDatabaseString(DB, 0)
Wend
FinishDatabaseQuery(DB)
Else
Debug DatabaseError()
EndIf
EndTime = ElapsedMilliseconds()
CompilerIf Not #PB_Compiler_Debugger
MessageRequester("Needed", Str(EndTime - StartTime))
CompilerEndIf
CloseDatabase(DB)
EndIf
Windows (x64)
Raspberry Pi OS (Arm64)
Raspberry Pi OS (Arm64)