Code : Tout sélectionner
;{ LIB code
; AIDE à :
; http://www.lumadis.be/regex/test_regex.php
ImportC ""
pb_pcre_exec(*pcre,*extra,subject.s,length,startoffset,options,*ovector,ovecsize)
EndImport
Structure List_Extraction
Array Extract.s(0)
EndStructure
Procedure.s BackrefReplaceRegularExpression(regexp_handle, string.s, replacement.s, maximum_reference.l = 10) ; 0 <= maximum_reference <= 99
Static Dim pcre_results(202)
Protected depart.l, char$, rpl.s, tableau_ref.l, tag$, p1.l, q1.l
depart = 0
While pb_pcre_exec(PeekL(regexp_handle), 0, string, Len(string), depart, 0, @pcre_results(), 202) > 0
; pcre_exec(PeekL(regexp_handle), 0, string, Len(string), pcre_results(1), 0, @pcre_results(), 202)
rpl.s = replacement
p = pcre_results(0)
q = pcre_results(1)
If FindString(replacement,"$0", 1)
rpl = ReplaceString(rpl, "$0", PeekS(@string + p, q - p))
EndIf
tableau_ref = 2
For _a = 1 To maximum_reference
tag$ = "$" + Str(_a)
If FindString(replacement, tag$, 1)
p1 = pcre_results(tableau_ref)
q1 = pcre_results(tableau_ref + 1)
rpl = ReplaceString(rpl, tag$, PeekS(@string + p1, q1 - p1))
EndIf
tag$ = "$#" + Str(_a)
position.l = FindString(replacement, tag$, 1)
If position
char$ = Mid(replacement, position+Len(tag$), 1)
p1 = pcre_results(tableau_ref)
q1 = pcre_results(tableau_ref + 1)
rpl = ReplaceString(rpl, tag$, RSet("",Len(PeekS(@string + p1, q1 - p1)),char$))
EndIf
tableau_ref + 2
Next
depart = p + Len(rpl)
string = Left(string, p) + rpl + Right(string, Len(string) - q)
Wend
ProcedureReturn string
EndProcedure
Procedure.s BackrefExtractRegularExpression(regexp_handle, string.s, Array Extract.b(1), List Resultat.List_Extraction()) ; 0 <= Taille tableau Extract <= 99
Static Dim pcre_results(202)
depart = 0
maximum_reference = ArraySize(Extract())
ClearList(Resultat())
While pb_pcre_exec(PeekL(regexp_handle), 0, string, Len(string), depart, 0, @pcre_results(), 202) > 0
; pcre_exec(PeekL(regexp_handle), 0, string, Len(string), pcre_results(1), 0, @pcre_results(), 202)
AddElement(Resultat())
Dim Resultat()\Extract(maximum_reference)
p = pcre_results(0)
q = pcre_results(1)
If Extract(0) = #True
Resultat()\Extract(0) = PeekS(@string + p, q - p)
EndIf
tableau_ref = 2
For _a = 1 To maximum_reference
If Extract(_a) = #True
p1 = pcre_results(tableau_ref)
q1 = pcre_results(tableau_ref + 1)
Resultat()\Extract(_a) = PeekS(@string + p1, q1 - p1)
EndIf
tableau_ref + 2
Next
depart = p ; + Len(rpl)
string = Left(string, p) + Right(string, Len(string) - q)
Wend
ProcedureReturn string
EndProcedure
;}
; in punct : [\]\[!"#$%&'()*+,./:;<=>?@\^_`{|}~-]
; REGEX$ = "([[:punct:]])(.*?)([[:punct:]])"
REGEX$ = "([\("+Chr(34)+"“«<\[{-])(.*?)([\)"+Chr(34)+"”»>\]}-])"
rh = CreateRegularExpression(#PB_Any ,REGEX$)
CHAINE$ = "How To find "+Chr(34)+"the"+Chr(34)+" And "+Chr(34)+"these"+Chr(34)+" followed by a word (To capture) in the «following» text."
Debug "#### Recurrent Replacement test :"
Debug ""
Debug "Starting text :"
Debug CHAINE$
Debug "Regex = ||" + REGEX$ + "||"
Debug ""
Debug "Result :"
Debug BackrefReplaceRegularExpression(rh, CHAINE$, "_$0_")
; creation of the array specifying which parenthesis must be returned
Dim tableau_extraction.b(3)
; we want all parenthesis so we complete the array all the way
For a = 0 To 3
tableau_extraction(a) = #True
Next
; creation of the list returning the result
NewList Result.List_Extraction()
; processing
BackrefExtractRegularExpression(rh, CHAINE$, tableau_extraction(), Result())
Debug ""
Debug ""
Debug "#### Recurrent extraction test :"
Debug ""
Debug "Starting text :"
Debug CHAINE$
Debug "Regex = ||" + REGEX$ + "||"
Debug ""
Debug "Extracted values :"
Debug ""
b = 0
ForEach Result()
Debug "Occurrence " + Str(b)
For a = 0 To 3
Debug "$" + Str(a) + " = " + Result()\Extract(a)
Next
b + 1
Next
Debug ""
Debug "exemple dans lequel on remplace les mots entre ponctuation par autant de '_' qu'il y avait de lettres."
Debug ""
Debug BackrefReplaceRegularExpression(rh, CHAINE$, "$1$#2_$3", 3)
Il est possible de modifier BackrefReplaceRegularExpression() pour que le nombre de _ corresponde au nombre de lettre d'avant.
J'en profite pour mentionner que les options de creation de regex sont un peu "pauvre", par exemple, on ne peux pas utilsier l'option U (ungreedy). Il faut la mentionner à chaque fois dans la regex.. C'est dommage.