Add COLLATE NOACCENT to sqlite table column

Just starting out? Need help? Post your questions and find answers here.
User avatar
doctorized
Addict
Addict
Posts: 856
Joined: Fri Mar 27, 2009 9:41 am
Location: Athens, Greece

Add COLLATE NOACCENT to sqlite table column

Post by doctorized »

I did some tests with MySQL Workbench where I used 'utf8mb4_0900_ai_ci' collation and managed to run some queries that I needed but I fail to add this collation to utf8 SQLite db with PB. How can I add accent insensitive collation?
infratec
Always Here
Always Here
Posts: 6866
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: Add COLLATE NOACCENT to sqlite table column

Post by infratec »

Since no one answered up to now...

You need to write an own collation procedure and publish it to SQLite.
Unfortunately this does not work directly with LIKE.

So you need also to write a 'replacement' for LIKE.

Good luck.
User avatar
doctorized
Addict
Addict
Posts: 856
Joined: Fri Mar 27, 2009 9:41 am
Location: Athens, Greece

Re: Add COLLATE NOACCENT to sqlite table column

Post by doctorized »

I have read this again but how can it be implemented? A simple example would be appreciated, if could be delivered by anybody.
User avatar
doctorized
Addict
Addict
Posts: 856
Joined: Fri Mar 27, 2009 9:41 am
Location: Athens, Greece

Re: Add COLLATE NOACCENT to sqlite table column

Post by doctorized »

Thank you very much. I have seen them and many others like them. I am not familiar with function creation so I am oriented to something like the ungly one you mention at the end. For now seems good to me.
infratec
Always Here
Always Here
Posts: 6866
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: Add COLLATE NOACCENT to sqlite table column

Post by infratec »

Try this:

Code: Select all

EnableExplicit

#SQLITE_UTF8          = 1    ; IMP: R-37514-35566
#SQLITE_UTF16LE       = 2    ; IMP: R-03371-37637
#SQLITE_UTF16BE       = 3    ; IMP: R-51971-34154
#SQLITE_UTF16         = 4    ; Use native byte order
#SQLITE_ANY           = 5    ; Deprecated
#SQLITE_UTF16_ALIGNED = 8    ; sqlite3_create_collation only

#SQLITE_STATIC = 0
#SQLITE_TRANSIENT = -1

ImportC "sqlite3.lib"
  sqlite3_create_function16.i(*sqlite3_context, zFunctionName.s, nArg.i, eTextRep.i, *pApp, *xFunc, *xStep, *xFinal)
  sqlite3_value_text16.i(*sqlite3_value)
  sqlite3_result_int(*sqlite3_context, int.i)
  sqlite3_create_collation16.i(*sqlite3_context, zName.s, eTextRep.i, *pArg, *xCompare)
EndImport


Procedure.s RemoveAccent(Input$)
 
  DataSection
    Vocals:
    Data.c 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'
    AccVocals:
    Data.c 'á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ì', 'ò', 'ù', 'ä', 'ë', 'ï', 'ö', 'ü', 'â', 'ê', 'î', 'ô', 'û'
  EndDataSection
 
 
  Protected i.i
 
  For i = 0 To 19
    ReplaceString(Input$, Chr(PeekC(?AccVocals + i * 2)), Chr(PeekC(?Vocals + i * 2)), #PB_String_InPlace)
  Next i
 
  ProcedureReturn Input$
 
EndProcedure




ProcedureC.i noaccent(*Arg, S1.i, String1$, S2.i, String2$)
 
  Protected Result.i, Str1$, Str2$
 
  ;Debug Str(S1) + " " + String1$ + " --- " + Str(S2) + " " + String2$
 
  Str1$ = RemoveAccent(LCase(String1$))
  Str2$ = RemoveAccent(LCase(String2$))
 
  ;Debug Str1$ + " --- " + Str2$
 
  If Str1$ < Str2$
    Result = -1
  ElseIf Str1$ > Str2$
    Result = 1
  Else
    Result = 0
  EndIf
 
  ProcedureReturn Result
 
EndProcedure



; ProcedureC NoAccentLike(*context, argc.i, *argv)
; 
;   Protected Like$, Text$, Result.i, RegEx.i
; 
;   If argc = 2
;   
;     Like$ = PeekS(sqlite3_value_text16(PeekI(*argv)))
;     If Like$ <> ""
;       Text$ = PeekS(sqlite3_value_text16(PeekI(*argv + SizeOf(Integer))))
;     
;       ;Debug Like$ + " -> " + Text$
;     
;       Like$ = ReplaceString(Like$, "%", ".*")
;       Like$ = ReplaceString(Like$, "_", ".")
;     
;       Text$ = RemoveAccent(Text$)
;     
;       RegEx = CreateRegularExpression(#PB_Any, Like$)
;       If IsRegularExpression(RegEx)
;         Result = MatchRegularExpression(RegEx, Text$)
;         FreeRegularExpression(RegEx)
;       EndIf
;     
;     EndIf
;   EndIf
; 
;   sqlite3_result_int(*context, Result)
; 
; EndProcedure




ProcedureC NoAccentLike(*context, argc.i, *argv)
 
  Protected Like$, Text$, Result.i
  Static LastLike$, RegEx.i
 
  If argc = 2
   
    Like$ = PeekS(sqlite3_value_text16(PeekI(*argv)))
    If Like$ <> ""
     
      If LastLike$ <> Like$
       
        LastLike$ = Like$
       
        If IsRegularExpression(RegEx)
          FreeRegularExpression(RegEx)
        EndIf
       
        Like$ = ReplaceString(Like$, "%", ".*")
        Like$ = ReplaceString(Like$, "_", ".")
       
        RegEx = CreateRegularExpression(#PB_Any, Like$)
      EndIf
     
      If IsRegularExpression(RegEx)
        Text$ = PeekS(sqlite3_value_text16(PeekI(*argv + SizeOf(Integer))))
        Text$ = RemoveAccent(Text$)
        Result = MatchRegularExpression(RegEx, Text$)
      EndIf
     
    EndIf
  EndIf
 
  sqlite3_result_int(*context, Result)
 
EndProcedure




Define DB.i, StartTime.i, EndTime.i, i.i

UseSQLiteDatabase()

DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB
 
  sqlite3_create_collation16(DatabaseID(DB), "noaccent", #SQLITE_UTF16, #Null, @noaccent())
 
  DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
  CompilerIf Not #PB_Compiler_Debugger
    For i = 0 To 10000
  CompilerEndIf
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau 1')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae 2')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('3 blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('4 bláe')")
  CompilerIf Not #PB_Compiler_Debugger
    Next i
  CompilerEndIf
 
  If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description = 'blae' COLLATE noaccent")
    While NextDatabaseRow(DB)
      Debug GetDatabaseString(DB, 0)
    Wend
    FinishDatabaseQuery(DB)
  Else
    Debug DatabaseError()
  EndIf
 
  Debug "-----"
 
  sqlite3_create_function16(DatabaseID(DB), "like", 2, #SQLITE_UTF16, #Null, @NoAccentLike(), #Null, #Null)
 
  StartTime = ElapsedMilliseconds()
  If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%blae%'")
    While NextDatabaseRow(DB)
      Debug GetDatabaseString(DB, 0)
    Wend
    FinishDatabaseQuery(DB)
  Else
    Debug DatabaseError()
  EndIf
  EndTime = ElapsedMilliseconds()
 
  CompilerIf Not #PB_Compiler_Debugger
    MessageRequester("Needed", Str(EndTime - StartTime))
  CompilerEndIf
 
  CloseDatabase(DB)
EndIf
I don't know how to remove the own like function. :cry:
Last edited by infratec on Sat Dec 07, 2019 10:13 pm, edited 4 times in total.
infratec
Always Here
Always Here
Posts: 6866
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: Add COLLATE NOACCENT to sqlite table column

Post by infratec »

I added the own LIKE
infratec
Always Here
Always Here
Posts: 6866
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: Add COLLATE NOACCENT to sqlite table column

Post by infratec »

I optimized the speed of LIKE

In my test with 40000 rows it needs now 79ms instead of 125ms
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3870
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: Add COLLATE NOACCENT to sqlite table column

Post by wilbert »

It's much faster to implement your own LIKE with a lookup table.
(of course an asm version of the code is even faster :wink: )

I implemented the easiest version for now.
There's also a more optimized one which is more efficient at handling match all '%'.
I might try that one as well.

The code below supports both LIKE with 2 and 3 arguments (ESCAPE).
I didn't specify a library for the ImportC to make it work on both Windows and macOS.

Code: Select all

#PatCmp_MatchAll = '%'
#PatCmp_MatchOne = '_'

Procedure.i PatCmp(*str.Unicode, *pat.Unicode, Array lut.u(1), esc.l = 0)
  
  ; wildcard matching algorithm based on code from
  ; http://dodobyte.com/wildcard.html
  
  Protected *pat_, *str_
  
  ; check arguments
  If *pat = #Null Or *str = #Null
    ProcedureReturn #False
  EndIf
  
  ; while *str has characters
  While *str\u
    If *pat\u
      
      ; handle 'match all'
      If *pat\u = #PatCmp_MatchAll
        *pat + 2
        If *pat\u = 0
          ProcedureReturn #True
        EndIf
        *pat_ = *pat
        *str_ = *str
        Continue
      EndIf
      
      ; handle escape character
      If *pat\u = esc
        *pat + 2
        If *pat\u = 0
          ProcedureReturn #False
        EndIf
      EndIf
      
      ; lookup characters and compare
      If lut(*pat\u) = lut(*str\u) Or *pat\u = #PatCmp_MatchOne
        *pat + 2
        *str + 2
        Continue
      EndIf
      
    EndIf
    
    ; characters didn't match
    If *pat_ = #Null
      ProcedureReturn #False
    EndIf
    *str_ + 2
    *str = *str_
    *pat = *pat_
    
  Wend
  
  ; *str has no more characters
  ; check if pattern ends with 'match all'
  While *pat\u = #PatCmp_MatchAll
    *pat + 2
  Wend
  If *pat\u = 0
    ProcedureReturn #True
  Else
    ProcedureReturn #False
  EndIf
  
EndProcedure



EnableExplicit

;- Constants

#SQLITE_UTF16 = 4


;- Imports

ImportC ""
  sqlite3_create_function16.i(*db, zFunctionName.p-unicode, nArg, eTextRep, *pApp, *xFunc, *xStep, *xFinal)
  sqlite3_result_int(*sqlite3_context, int)
  sqlite3_value_text16.i(*sqlite3_value)
EndImport


;- Globals

Global Dim LUT_Like.u(65535)


;- Procedures

Procedure LUT_Init()
  
  Protected c1.s = "ÁÉÍÓÚÀÈÌÒÙÄÉÏÖÜÂÊÎÔÛáéíóúàèìòùäëïöüâêîôû"
  Protected c2.s = "aeiouaeiouaeiouaeiouaeiouaeiouaeiouaeiou"
  
  Protected.Unicode *c1=@c1, *c2=@c2
  Protected.i c
  
  For c = 0 To 65535
    LUT_Like(c) = c
  Next
  
  For c = 'A' To 'Z'
    LUT_Like(c) = c + 32
  Next
    
  While *c1\u And *c2\u
    LUT_Like(*c1\u) = *c2\u
    *c1 + 2
    *c2 + 2
  Wend
  
EndProcedure

LUT_Init()



ProcedureC Like(*context, argc, *argv.Integer)
  
  Protected.Unicode *pat, *str, *esc
  Protected.l esc
  
  ; get pattern and string
  *pat = sqlite3_value_text16(*argv\i)
  *argv + SizeOf(Integer)
  *str = sqlite3_value_text16(*argv\i)
  
  ; three argument LIKE
  If argc = 3
    *argv + SizeOf(Integer)
    *esc = sqlite3_value_text16(*argv\i)
    If *esc = #Null
      ProcedureReturn
    EndIf
    esc = *esc\u
    *esc + 2
    If *esc\u
      ProcedureReturn
    EndIf
  EndIf    
  
  ; set result
  sqlite3_result_int(*context, PatCmp(*str, *pat, LUT_Like(), esc))
  
EndProcedure




;- Main code

Define DB.i, StartTime.i, EndTime.i, i.i

UseSQLiteDatabase()

DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB

  DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
  CompilerIf Not #PB_Compiler_Debugger
    For i = 0 To 10000
  CompilerEndIf
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaE')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
  CompilerIf Not #PB_Compiler_Debugger
    Next i
  CompilerEndIf
  
  sqlite3_create_function16(DatabaseID(DB), "like", -1, #SQLITE_UTF16, #Null, @Like(), #Null, #Null)
  
  StartTime = ElapsedMilliseconds()
  If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%e' ESCAPE '\'")
    While NextDatabaseRow(DB)
      Debug GetDatabaseString(DB, 0)
    Wend
    FinishDatabaseQuery(DB)
  Else
    Debug DatabaseError()
  EndIf
  EndTime = ElapsedMilliseconds()
  
  CompilerIf Not #PB_Compiler_Debugger
    MessageRequester("Needed", Str(EndTime - StartTime))
  CompilerEndIf
  
  CloseDatabase(DB)
EndIf
Windows (x64)
Raspberry Pi OS (Arm64)
User avatar
doctorized
Addict
Addict
Posts: 856
Joined: Fri Mar 27, 2009 9:41 am
Location: Athens, Greece

Re: Add COLLATE NOACCENT to sqlite table column

Post by doctorized »

All codes run ok when running as is but infratec's code is not working if we add the values 'blau 1', 'blae 2', '3 blaé', '4 bláe' and do search with description like '%blae%'. On the other hand, wilbert's codes runs ok in this situation and is the one I need. Thank you both very much for your help! I really appreciate it!
infratec
Always Here
Always Here
Posts: 6866
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: Add COLLATE NOACCENT to sqlite table column

Post by infratec »

Hm...

with

Code: Select all

  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau 1')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blae 2')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('3 blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('4 bláe')")
and

Code: Select all

SELECT * FROM myTable WHERE description LIKE '%blae%'
I get

blae
blaé
bláe
blae 2
3 blaé
4 bláe

I think that's right.
What is wrong in your case?
Btw. wilberts code returns the same result.

(I included your example in my code above)
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3870
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: Add COLLATE NOACCENT to sqlite table column

Post by wilbert »

Good to hear the code works for you. :)

Here's also the optimized version.
It should give the same results but is significantly faster when the pattern contains '%' and the database strings are longer.
I also tried an asm version which is again faster but since it's asm code, it's much harder to read the code.

Code: Select all

#PatCmp_MatchAll = '%'
#PatCmp_MatchOne = '_'

Procedure.i PatCmp(*str.Unicode, *pat.Unicode, Array lut.u(1), esc.l = 0)
  
  ; wildcard matching algorithm based on code from
  ; http://dodobyte.com/wildcard.html
  
  Protected *pat_, *str_, lc.u
  
  ; check arguments
  If *pat = #Null Or *str = #Null
    ProcedureReturn #False
  EndIf
  
  ; while *str has characters
  While *str\u
    If *pat\u
      
      ; handle 'match one'
      If *pat\u = #PatCmp_MatchOne
        *pat + 2
        *str + 2
        Continue
      EndIf
      
      ; handle 'match all'
      If *pat\u = #PatCmp_MatchAll
        Repeat
          *pat + 2
          While *pat\u = #PatCmp_MatchOne
            *pat + 2
            *str + 2
            If *str\u = 0
              Break 3
            EndIf
          Wend
        Until *pat\u <> #PatCmp_MatchAll
        If *pat\u = 0
          ProcedureReturn #True
        EndIf
        If *pat\u = esc
          *pat + 2
          If *pat\u = 0
            ProcedureReturn #False
          EndIf
        EndIf
        *pat_ = *pat
        Goto NextMatch
      EndIf
      
      ; compare characters
      If *pat\u = esc
        *pat + 2
        If *pat\u = 0
          ProcedureReturn #False
        EndIf
      EndIf
      If lut(*pat\u) = lut(*str\u)
        *pat + 2
        *str + 2
        Continue
      EndIf
      
    EndIf
    
    ; characters didn't match
    If *pat_ = #Null
      ProcedureReturn #False
    EndIf
    *pat = *pat_
    *str = *str_ + 2
    
    ; find next match after 'match all'
    NextMatch:
    lc = lut(*pat\u)
    While lc <> lut(*str\u)
      *str + 2
      If *str\u = 0
        ProcedureReturn #False
      EndIf
    Wend
    *str_ = *str
    *str + 2
    *pat + 2
    
  Wend
  
  ; *str has no more characters
  ; check if pattern ends with 'match all'
  While *pat\u = #PatCmp_MatchAll
    *pat + 2
  Wend
  If *pat\u = 0
    ProcedureReturn #True
  Else
    ProcedureReturn #False
  EndIf
  
EndProcedure



EnableExplicit

;- Constants

#SQLITE_UTF16 = 4


;- Imports

ImportC ""
  sqlite3_create_function16.i(*db, zFunctionName.p-unicode, nArg, eTextRep, *pApp, *xFunc, *xStep, *xFinal)
  sqlite3_result_int(*sqlite3_context, int)
  sqlite3_value_text16.i(*sqlite3_value)
EndImport


;- Globals

Global Dim LUT_Like.u(65535)


;- Procedures

Procedure LUT_Init()
  
  Protected c1.s = "ÁÉÍÓÚÀÈÌÒÙÄÉÏÖÜÂÊÎÔÛáéíóúàèìòùäëïöüâêîôû"
  Protected c2.s = "aeiouaeiouaeiouaeiouaeiouaeiouaeiouaeiou"
  
  Protected.Unicode *c1=@c1, *c2=@c2
  Protected.i c
  
  For c = 0 To 65535
    LUT_Like(c) = c
  Next
  
  For c = 'A' To 'Z'
    LUT_Like(c) = c + 32
  Next
    
  While *c1\u And *c2\u
    LUT_Like(*c1\u) = *c2\u
    *c1 + 2
    *c2 + 2
  Wend
  
EndProcedure

LUT_Init()



ProcedureC Like(*context, argc, *argv.Integer)
  
  Protected.Unicode *pat, *str, *esc
  Protected.l esc
  
  ; get pattern and string
  *pat = sqlite3_value_text16(*argv\i)
  *argv + SizeOf(Integer)
  *str = sqlite3_value_text16(*argv\i)
  
  ; three argument LIKE
  If argc = 3
    *argv + SizeOf(Integer)
    *esc = sqlite3_value_text16(*argv\i)
    If *esc = #Null
      ProcedureReturn
    EndIf
    esc = *esc\u
    *esc + 2
    If *esc\u
      ProcedureReturn
    EndIf
  EndIf    
  
  ; set result
  sqlite3_result_int(*context, PatCmp(*str, *pat, LUT_Like(), esc))
  
EndProcedure




;- Main code

Define DB.i, StartTime.i, EndTime.i, i.i

UseSQLiteDatabase()

DB = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If DB

  DatabaseUpdate(DB, "CREATE TABLE myTable (description text)")
  CompilerIf Not #PB_Compiler_Debugger
    For i = 0 To 10000
  CompilerEndIf
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blau')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaE')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('blaé')")
  DatabaseUpdate(DB, "INSERT INTO myTable VALUES ('bláe')")
  CompilerIf Not #PB_Compiler_Debugger
    Next i
  CompilerEndIf
  
  sqlite3_create_function16(DatabaseID(DB), "like", -1, #SQLITE_UTF16, #Null, @Like(), #Null, #Null)
  
  StartTime = ElapsedMilliseconds()
  If DatabaseQuery(DB, "SELECT * FROM myTable WHERE description LIKE '%e' ESCAPE '\'")
    While NextDatabaseRow(DB)
      Debug GetDatabaseString(DB, 0)
    Wend
    FinishDatabaseQuery(DB)
  Else
    Debug DatabaseError()
  EndIf
  EndTime = ElapsedMilliseconds()
  
  CompilerIf Not #PB_Compiler_Debugger
    MessageRequester("Needed", Str(EndTime - StartTime))
  CompilerEndIf
  
  CloseDatabase(DB)
EndIf
Windows (x64)
Raspberry Pi OS (Arm64)
Post Reply