Page 1 sur 1

Procedure UCase2() - Normalisation / Suppression des accents

Publié : mer. 12/avr./2006 21:21
par Flype
Lorsqu'un de mes programmes propose une fonction de recherche,
j'ai l'habitude d'utiliser cette procédure dans mon code.
Elle est pratique car elle simplifie énormément la recherche utilisateur
en ne tenant pas compte des accents et autres caractères inutiles dans une recherche textuelle.
C'est tout bête mais çà augmente considérablement les chances de
réussite. Cette fonction est très rapide et peut être utilisée avec d'assez gros textes.

La boucle While/Wend ne sert qu'à mapper/remplacer caractère après caractère.
Toute la subtilité est dans l'agencement des datas que vous pouvez remanier à votre convenance.

Code : Tout sélectionner

Procedure.s UCase2(string.s)
  
  Protected *mask.CHARACTER, *str.CHARACTER = @string
  
  While *str\c
    *mask = ?CHARMASK + *str\c
    *str\c = *mask\c
    *str + 1
  Wend
  
  ProcedureReturn string
  
  DataSection
    CHARMASK:
    Data.c 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.c 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.c 32,32,32,32,32,32,32,32,48,49,50,51,52,53,54,55,56,57,32,32
    Data.c 32,32,32,32,32,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
    Data.c 80,81,82,83,84,85,86,87,88,89,90,32,32,32,32,32,32,65,66,67
    Data.c 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
    Data.c 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32
    Data.c 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89
    Data.c 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.c 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67
    Data.c 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85
    Data.c 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73
    Data.c 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89
  EndDataSection
  
EndProcedure

Debug UCase2("Pürèbâsïç, Là pùIssañçÊ à l'étät pùr!")

Publié : mer. 12/avr./2006 23:26
par Fred
Excellent ;).

Publié : jeu. 13/avr./2006 0:48
par Flype
Compatible ASCII/UNICODE, c'était pas le cas avant :

Code : Tout sélectionner

Procedure.s UCase2(string.s)
  
  Protected *mask.BYTE, *str.CHARACTER = @string
  
  While *str\c
    *mask = ?CHARMASK + *str\c
    *str\c = *mask\b
    *str + SizeOf(CHARACTER)
  Wend
  
  ProcedureReturn string
  
  DataSection
    CHARMASK:
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,48,49,50,51,52,53,54,55,56,57,32,32
    Data.b 32,32,32,32,32,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
    Data.b 80,81,82,83,84,85,86,87,88,89,90,32,32,32,32,32,32,65,66,67
    Data.b 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
    Data.b 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32
    Data.b 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89
    Data.b 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67
    Data.b 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85
    Data.b 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73
    Data.b 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89
  EndDataSection
  
EndProcedure

Debug UCase2("Pürèbâsïç, Là pùIssañçÊ à l'étät pùr!")

Publié : jeu. 13/avr./2006 8:05
par gnozal
Traduction pour PB3.94 (désolé :wink:)

Code : Tout sélectionner

; /////////////////////////////////////////////////////////////////////
;                             PB 3.94
; /////////////////////////////////////////////////////////////////////
;
Procedure.s UCase2(string.s) 
  
  Protected *mask.BYTE, *str.BYTE
  
  *str = @string 
  
  While *str\b 
    *mask = ?CHARMASK + *str\b & $FF
    *str\b = *mask\b 
    *str + 1 
  Wend 
  
  ProcedureReturn string 
  
  DataSection 
    CHARMASK: 
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32 
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32 
    Data.b 32,32,32,32,32,32,32,32,48,49,50,51,52,53,54,55,56,57,32,32 
    Data.b 32,32,32,32,32,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 
    Data.b 80,81,82,83,84,85,86,87,88,89,90,32,32,32,32,32,32,65,66,67 
    Data.b 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87 
    Data.b 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32 
    Data.b 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89 
    Data.b 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32 
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67 
    Data.b 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85 
    Data.b 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73 
    Data.b 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89 
  EndDataSection 
  
EndProcedure 

Debug UCase2("Pürèbâsïç, Là pùIssañçÊ à l'étät pùr!")

Publié : jeu. 13/avr./2006 10:48
par Dr. Dri
Ca ne supprime pas que les accents mais aussi la ponctuation. Y'a surement moyen de gérer ca selon le charset de la chaine, nan ? Ca serait moins portable mais multilingue.

Dri

Publié : jeu. 13/avr./2006 12:03
par Flype
@dri
oui complètement mais c'est l'effet désiré pour moi.

imagine que par example un utilisateur recherche 'post-production'
et que dans la base il y a 'post production'

dans ton code tu fais quelquechose comme :

Code : Tout sélectionner

Procedure.s UCase2(string.s)
  
  Protected *mask.BYTE, *str.CHARACTER = @string
  
  While *str\c
    *mask = ?CHARMASK + *str\c
    *str\c = *mask\b
    *str + SizeOf(CHARACTER)
  Wend
  
  ProcedureReturn string
  
  DataSection
    CHARMASK:
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,48,49,50,51,52,53,54,55,56,57,32,32
    Data.b 32,32,32,32,32,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
    Data.b 80,81,82,83,84,85,86,87,88,89,90,32,32,32,32,32,32,65,66,67
    Data.b 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
    Data.b 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32
    Data.b 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89
    Data.b 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67
    Data.b 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85
    Data.b 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73
    Data.b 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89
  EndDataSection
  
EndProcedure 

base.s = "article comptabilisé en post production le 10/04/2006."
saisie.s = "post-production"

If FindString( UCase(base), UCase(saisie), 1)
  Debug "trouvé"
Else
  Debug "non trouvé"
EndIf

If FindString( UCase2(base), UCase2(saisie), 1)
  Debug "trouvé"
Else
  Debug "non trouvé"
EndIf

Publié : jeu. 13/avr./2006 12:11
par Flype
et comme je disais dans le premier post on peut ajuster les datas pour que tout soit possible :

Code : Tout sélectionner

Procedure.s UCase2(string.s, ponctuation.l = #False)
  
  Protected *mask.BYTE, *str.CHARACTER = @string
  
  If ponctuation
    While *str\c
      *mask = ?CHARPONCTMASK + *str\c
      *str\c = *mask\b
      *str + SizeOf(CHARACTER)
    Wend
  Else
    While *str\c
      *mask = ?CHARMASK + *str\c
      *str\c = *mask\b
      *str + SizeOf(CHARACTER)
    Wend
  EndIf
  
  ProcedureReturn string
  
  DataSection ; Masque sans ponctuation
    CHARMASK:
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,48,49,50,51,52,53,54,55,56,57,32,32
    Data.b 32,32,32,32,32,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
    Data.b 80,81,82,83,84,85,86,87,88,89,90,32,32,32,32,32,32,65,66,67
    Data.b 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
    Data.b 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32
    Data.b 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89
    Data.b 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67
    Data.b 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85
    Data.b 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73
    Data.b 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89
  EndDataSection
  
  DataSection ; Masque avec ponctuation
    CHARPONCTMASK:
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,32,33,34,35,36,37,38,39
    Data.b 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
    Data.b 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
    Data.b 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,65,66,67
    Data.b 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
    Data.b 88,89,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,83,32
    Data.b 32,32,90,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,90,89
    Data.b 32,73,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32
    Data.b 32,32,32,32,32,32,32,32,32,32,32,32,65,65,65,65,65,65,65,67
    Data.b 69,69,69,69,73,73,73,73,68,78,79,79,79,79,79,32,79,85,85,85
    Data.b 85,89,80,32,65,65,65,65,65,65,32,67,69,69,69,69,73,73,73,73
    Data.b 32,78,79,79,79,79,79,32,32,85,85,85,85,89,32,89
  EndDataSection
  
EndProcedure 

test.s = "Pürèbâsïç, Là pùIssañçÊ à l'étät pùr!"

Debug UCase(test) 
Debug UCase2(test) 
Debug UCase2(test, #True)