immer wieder hat es mich aufgeregt, dass ich mit der RegularExpression-Lib keine "richtigen" Ersetzungen vornehmen könnte.
Darum habe ich kurzer Hand selbst ein Include für reguläre Ausdrücke geschreiben, was speziell für PB entworfen ist.
Die wichtigste Änderung ist dabei, dass die Befehle ExtractRegex() und ReplaceRegex() einen Callback verlangen, in dem man nun selber entscheiden kann, was passieren soll. Dabei wird eine Map übergeben, wo mit Index oder Namen auf die Gruppen zugegriffen werden kann.
Der Syntax für die reguläre Ausdrücke ist noch nicht vollständig, aber grundlegende Sachen (Zeichengruppen, Strings, Wiederholung, Gruppen (auch mit Namen), Backreference) sind verfügbar.
Ich würde von euch gerne ein Feedback haben wollen, was ihr von dieser Alternative haltet.
Update:
- PB 5.20 kompatibel
- genügsamme und gierige Quantifier
- Lookahead
- FindRegex
Beispiel:
Code: Alles auswählen
Enumeration
#Regex
EndEnumeration
Procedure.i CallbackExtract(Map Hit.s())
Debug "Gefunden: "+Hit()
Debug " Zahl = "+Hit("Zahl")
Debug " Einheit = "+Hit("Einheit")
EndProcedure
Procedure.s CallbackReplace(Map Hit.s())
ProcedureReturn "<b>"+Hit("Zahl")+"</b>"+" "+"<i>"+Hit("Einheit")+"</i>"
EndProcedure
CreateRegex(#Regex, "(?P<Zahl>\d+\.?\d*)\s*(?P<Einheit>[\w/]+)")
ExtractRegex(#Regex, "Die Geschwindigkeit ist 123.75 m/s bei einer Strecke von 159 m", @CallbackExtract())
Debug ReplaceRegex(#Regex, "Die Geschwindigkeit ist 123.75 m/s bei einer Strecke von 159 m", @CallbackReplace())
Code: Alles auswählen
EnableExplicit
;|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
;- 1. Constants / Konstanten
;|__________________________________________________________________________________________________
#Regex_IndexNumber = $FFFF
#Regex_AnyNumber = ~#Regex_IndexNumber
Enumeration 1
#RegexRule_Any
#RegexRule_Set
#RegexRule_String
#RegexRule_Quantifier
#RegexRule_Decision
#RegexRule_Concatenation
#RegexRule_Group
#RegexRule_Backreference
EndEnumeration
Enumeration 1
#RegexSet_Character
#RegexSet_NegateCharacter
#RegexSet_Group
#RegexSet_NegateGroup
EndEnumeration
Enumeration -1
#RegexGroup_NoIndex
#RegexGroup_Normal
#RegexGroup_PositiveLookahead
#RegexGroup_NegativeLookahead
EndEnumeration
Enumeration -1
#RegexQuantifier_Modest
#RegexQuantifier_Normal
#RegexQuantifier_Greedy
EndEnumeration
Enumeration
#Regex_NoMatch = -1
EndEnumeration
;|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
;- 2. Structures / Strukturen
;|__________________________________________________________________________________________________
CompilerIf Defined(CharacterArray, #PB_Structure) = #False
Structure CharacterArray
c.c[0]
EndStructure
CompilerEndIf
CompilerIf Defined(LongArray, #PB_Structure) = #False
Structure LongArray
l.l[0]
EndStructure
CompilerEndIf
; Kopf einer Regel
Structure RegexRule
Type.i
*Parent.RegexRule
*NextSibling.RegexRule
*FirstChild.RegexRule
EndStructure
; Beliebiges Zeichen
Structure RegexRule_Any Extends RegexRule
EndStructure
; Zeichenklasse
Structure RegexRule_Set Extends RegexRule
Negate.i
Array Mask.l(7)
EndStructure
; Zeichenkette
Structure RegexRule_String Extends RegexRule
Array Character.c(0)
EndStructure
; Auswahl
Structure RegexRule_Decision Extends RegexRule
EndStructure
; Wiederholung
Structure RegexRule_Quantifier Extends RegexRule
Mode.i
Min.i
Max.i
TempValue.i
EndStructure
; Verkettung
Structure RegexRule_Concatenation Extends RegexRule
EndStructure
; Unterausdruck
Structure RegexRule_Group Extends RegexRule
Mode.i
Index.i
Name.s
TempPosition.i
EndStructure
; Rückverweis
Structure RegexRule_Backreference Extends RegexRule
Index.i
EndStructure
Structure RegexInput
*Character.CharacterArray
Index.i
CurrentGroupIndex.i
*Regex.Regex
EndStructure
Structure RegexExtract
*Character.CharacterArray
Index.i
EndStructure
Prototype.i ExtractRegexCallback(Map Hit.s())
Prototype.s ReplaceRegexCallback(Map Hit.s())
Prototype.i FindRegexCallback(Position.i, Length.i)
Structure Regex
Number.i
*Rule.RegexRule
EndStructure
Structure RegexInclude_Rules
List Any.RegexRule_Any()
List Set.RegexRule_Set()
List String.RegexRule_String()
List Quantifier.RegexRule_Quantifier()
List Concatenation.RegexRule_Concatenation()
List Decision.RegexRule_Decision()
List Group.RegexRule_Group()
List Backreference.RegexRule_Backreference()
EndStructure
Structure RegexInclude_StackEntry
*Rule.RegexRule
Position.i
StructureUnion
Parameter.i
*Child.RegexRule
EndStructureUnion
EndStructure
Structure RegexInclude_Stack
Array Entry.RegexInclude_StackEntry(1048576)
Index.i
EndStructure
Structure RegexInclude
List Regex.Regex()
Array *RegexID.Regex(0)
Rules.RegexInclude_Rules
Stack.RegexInclude_Stack
EndStructure
;|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
;- 3. Initializations / Initialisierungen
;|__________________________________________________________________________________________________
Global RegexInclude.RegexInclude
Declare Regex_CreateRule(*Input.RegexInput)
Declare.i RegexID(Regex.i)
Declare.i IsRegex(Regex.i)
Declare.i FreeRegex(Regex.i)
Declare.i CreateRegex(Regex.i, Syntax.s)
Declare DebugRegex(Regex.i)
Declare ExtractRegex(Regex.i, String.s, Callback.ExtractRegexCallback)
Declare.s ReplaceRegex(Regex.i, String.s, Callback.ReplaceRegexCallback)
Declare FindRegex(Regex.i, String.s, Callback.FindRegexCallback)
;|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
;- 4. Procedures & Macros / Prozeduren & Makros
;|__________________________________________________________________________________________________
;- 4.1 Private procedures for internal calculations ! Not for use !
;¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
; Fügt '*Rule' ein neues Kind ('*ChildRule') am Ende hinzu.
Procedure Regex_NewChild(*Rule.RegexRule, *ChildRule.RegexRule)
Protected *Next.RegexRule
; Verknüpfung zu: Elternelement, Erstes Element, Nächstes Element
*ChildRule\Parent = *Rule
If *Rule\FirstChild
*Next = *Rule\FirstChild
While *Next\NextSibling
*Next = *Next\NextSibling
Wend
*Next\NextSibling = *ChildRule
Else
*Rule\FirstChild = *ChildRule
EndIf
EndProcedure
Procedure Regex_CreateRule_Set_Edit(*Set.RegexRule_Set, Mode.i, *Parameter.LongArray)
Protected Size.i, Index.i, LastIndex.i = ArraySize(*Set\Mask())
Protected Chacater.i = *Parameter
If Mode = #RegexSet_Character And Chacater>>5 > ArraySize(*Set\Mask())
ReDim *Set\Mask(Chacater>>5)
EndIf
Select *Set\Negate
Case #False
Select Mode
Case #RegexSet_Character
*Set\Mask(Chacater>>5) = *Set\Mask(Chacater>>5) | 1<<(Chacater%32)
Case #RegexSet_Group
For Index = 0 To 7
*Set\Mask(Index) = *Set\Mask(Index) | *Parameter\l[Index]
Next
Case #RegexSet_NegateGroup
For Index = 0 To 7
*Set\Mask(Index) = *Parameter\l[Index] & ~ *Set\Mask(Index)
Next
*Set\Negate = #True
EndSelect
Case #True
Select Mode
Case #RegexSet_Character
*Set\Mask(Chacater>>5) = *Set\Mask(Chacater>>5) & ~ (1<<(Chacater%32))
Case #RegexSet_Group
For Index = 0 To 7
*Set\Mask(Index) = *Set\Mask(Index) & ~ *Parameter\l[Index]
Next
Case #RegexSet_NegateGroup
For Index = 0 To 7
*Set\Mask(Index) = *Set\Mask(Index) & *Parameter\l[Index]
Next
EndSelect
EndSelect
EndProcedure
Procedure Regex_CreateRule_Set(*Input.RegexInput, NoLoop.i=#False)
Protected *Set.RegexRule_Set = AddElement(RegexInclude\Rules\Set())
Protected FirstCharacter.i, LastCharacter.i, Character.i
Protected Index.i, Negate.i
With *Set
\Type = #RegexRule_Set
Repeat
Select *Input\Character\c[*Input\Index]
Case #NUL, ']'
Break
Case '\'
*Input\Index+1
Select *Input\Character\c[*Input\Index]
Case 'd'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Group, ?Regex_Set_d)
Case 'w'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Group, ?Regex_Set_w)
Case 's'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Group, ?Regex_Set_s)
Case 'D'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_NegateGroup, ?Regex_Set_d)
Case 'W'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_NegateGroup, ?Regex_Set_w)
Case 'S'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_NegateGroup, ?Regex_Set_s)
Case 't'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, #TAB)
Case 'n'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, #LF)
Case 'r'
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, #CR)
Default
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, *Input\Character\c[*Input\Index])
EndSelect
Case '^' ; Negierung (wenn am Anfang, sonst Einzelzeichen)
If *Input\Character\c[*Input\Index-1] = '['
Negate = #True
Else
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, '^')
EndIf
Case '-' ; Bereich
FirstCharacter = *Input\Character\c[*Input\Index-1]
LastCharacter = *Input\Character\c[*Input\Index+1]
For Character = FirstCharacter To LastCharacter
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, Character)
Next
Default ; Einzelzeichen
Regex_CreateRule_Set_Edit(*Set, #RegexSet_Character, *Input\Character\c[*Input\Index])
EndSelect
If NoLoop = #False
*Input\Index + 1
EndIf
Until NoLoop
EndWith
If Negate = #True
*Set\Negate = 1-*Set\Negate
EndIf
ProcedureReturn *Set
EndProcedure
; Erstellt ein Wiederholungs-Regel für die angegebene Regel
Procedure Regex_CreateRule_Quantifier(*Input.RegexInput, *Rule.RegexRule)
Protected *Quantifier.RegexRule_Quantifier = AddElement(RegexInclude\Rules\Quantifier())
Protected *Value.Integer
With *Quantifier
\Type = #RegexRule_Quantifier
\Mode = #RegexQuantifier_Normal
Regex_NewChild(*Quantifier, *Rule)
Repeat
Select *Input\Character\c[*Input\Index]
Case #NUL
Break
Case '?' ; einmal oder garnicht
\Min = 0
\Max = 1
Break
Case '+' ; mindestens einmal
\Min = 1
\Max = $7FFFFFFF
Break
Case '*' ; beliebig
\Min = 0
\Max = $7FFFFFFF
Break
Case '{' ; mindestens und maximal
*Value = @\Min
Case ','
*Value = @\Max
Case '}'
If *Input\Character\c[*Input\Index-1] = ','
\Max = $7FFFFFFF
ElseIf \Max < \Min
\Max = \Min
EndIf
Break
Case '0' To '9'
*Value\i * 10 + (*Input\Character\c[*Input\Index]-'0')
EndSelect
*Input\Index + 1
ForEver
Select *Input\Character\c[*Input\Index+1]
Case '?'
\Mode = #RegexQuantifier_Modest
*Input\Index + 1
Case '+'
\Mode = #RegexQuantifier_Greedy
*Input\Index + 1
EndSelect
EndWith
ProcedureReturn *Quantifier
EndProcedure
Procedure Regex_CreateRule_String(*Input.RegexInput)
Protected *String.RegexRule_String = AddElement(RegexInclude\Rules\String())
Protected Index.i
With *String
\Type = #RegexRule_String
Repeat
Select *Input\Character\c[*Input\Index]
Case #NUL, '[', '|', ')', '(', '.'
*Input\Index - 1
Break
Case '?', '+', '*', '{'
*Input\Index - 1
If ArraySize(*String\Character()) > 0
ReDim *String\Character(ArraySize(*String\Character())-1)
*Input\Index - 1
EndIf
Break
Case '\'
Select *Input\Character\c[*Input\Index+1]
Case 'd', 'w', 's', 'D', 'W', 'S', '0' To '9'
*Input\Index - 1
Break
Default
*Input\Index + 1
ReDim \Character(Index)
\Character(Index) = *Input\Character\c[*Input\Index]
Index + 1
EndSelect
Default
ReDim \Character(Index)
\Character(Index) = *Input\Character\c[*Input\Index]
Index + 1
EndSelect
*Input\Index + 1
ForEver
EndWith
ProcedureReturn *String
EndProcedure
Procedure Regex_CreateRule_Concatenation(*Concatenation.RegexRule_Concatenation, *Rule.RegexRule)
If *Rule
If Not *Concatenation
*Concatenation = AddElement(RegexInclude\Rules\Concatenation())
*Concatenation\Type = #RegexRule_Concatenation
EndIf
Regex_NewChild(*Concatenation, *Rule)
EndIf
ProcedureReturn *Concatenation
EndProcedure
Procedure Regex_CreateRule_Any()
Protected *Any.RegexRule_Any = AddElement(RegexInclude\Rules\Any())
With *Any
\Type = #RegexRule_Any
EndWith
ProcedureReturn *Any
EndProcedure
Procedure Regex_CreateRule_Group(*Input.RegexInput)
Protected *Group.RegexRule_Group = AddElement(RegexInclude\Rules\Group())
Protected NameIndex.i
With *Group
\Type = #RegexRule_Group
Select *Input\Character\c[*Input\Index]
Case '?'
*Input\Index + 1
Select *Input\Character\c[*Input\Index]
Case '=' ; PositiveLookahead
\Index = #PB_Default
\Mode = #RegexGroup_PositiveLookahead
*Input\Index + 1
Case '!' ; NegativeLookahead
\Index = #PB_Default
\Mode = #RegexGroup_NegativeLookahead
*Input\Index + 1
Case ':' ; Kein Index vergeben
\Index = #PB_Default
\Mode = #RegexGroup_NoIndex
*Input\Index + 1
Case 'P' ; Namensindex
*Input\Index + 2
If *Input\Character\c[*Input\Index-1] = '<'
NameIndex = *Input\Index
Repeat
*Input\Index + 1
Until *Input\Character\c[*Input\Index] = #NUL Or *Input\Character\c[*Input\Index-1] = '>'
\Name = PeekS(@*Input\Character\c[NameIndex], *Input\Index-NameIndex-1)
EndIf
*Input\CurrentGroupIndex + 1
\Index = *Input\CurrentGroupIndex
EndSelect
Case ')', #NUL
ProcedureReturn *Group
Default
*Input\CurrentGroupIndex + 1
\Index = *Input\CurrentGroupIndex
EndSelect
Regex_NewChild(*Group, Regex_CreateRule(*Input))
EndWith
ProcedureReturn *Group
EndProcedure
Procedure Regex_CreateRule_Backreference(*Input.RegexInput)
Protected *Backreference.RegexRule_Backreference = AddElement(RegexInclude\Rules\Backreference())
Protected NameIndex.i
With *Backreference
\Type = #RegexRule_Backreference
\Index = *Input\Character\c[*Input\Index]-'0'
EndWith
ProcedureReturn *Backreference
EndProcedure
Procedure Regex_CreateRule(*Input.RegexInput)
Protected *CurrentElement.RegexRule
Protected *Decision.RegexRule_Decision
Protected *Concatenation.RegexRule_Concatenation
Repeat
Select *Input\Character\c[*Input\Index]
; Beliebiges Zeichen
Case '.'
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
*CurrentElement = Regex_CreateRule_Any()
; Zeichenklasse
Case '['
*Input\Index + 1
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
*CurrentElement = Regex_CreateRule_Set(*Input)
; Spezielle Zeichenklasse
Case '\'
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
Select *Input\Character\c[*Input\Index+1]
Case '0' To '9'
*Input\Index + 1
*CurrentElement = Regex_CreateRule_Backreference(*Input)
Default
*CurrentElement = Regex_CreateRule_Set(*Input, #True)
EndSelect
; Unterausdruck
Case '('
*Input\Index + 1
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
*CurrentElement = Regex_CreateRule_Group(*Input)
Case ')'
Break
; Wiederholung
Case '?', '*', '+', '{'
*CurrentElement = Regex_CreateRule_Quantifier(*Input, *CurrentElement)
; Auswahl
Case '|'
If Not *Decision
*Decision = AddElement(RegexInclude\Rules\Decision())
*Decision\Type = #RegexRule_Decision
EndIf
If *Concatenation
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
Regex_NewChild(*Decision, *Concatenation)
*Concatenation = #Null
Else
Regex_NewChild(*Decision, *CurrentElement)
EndIf
*CurrentElement = #Null
; Ende
Case #NUL
*Input\Index - 1
Break
; Nichts
Case ' '
; Zeichenkette
Default
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
*CurrentElement = Regex_CreateRule_String(*Input)
EndSelect
*Input\Index + 1
ForEver
If *Concatenation And *CurrentElement
*Concatenation = Regex_CreateRule_Concatenation(*Concatenation, *CurrentElement)
EndIf
If *Decision
If *Concatenation
Regex_NewChild(*Decision, *Concatenation)
Else
Regex_NewChild(*Decision, *CurrentElement)
EndIf
ProcedureReturn *Decision
ElseIf *Concatenation
ProcedureReturn *Concatenation
Else
ProcedureReturn *CurrentElement
EndIf
EndProcedure
Procedure Regex_Debug(*Rule.RegexRule, Level.i=0, GroupIndex.i=-1, GroupName.s="")
Protected String.s, Field.i, Bit.i
Protected Prefix.s, Name.s, *Child.RegexRule
Protected *Set.RegexRule_Set, *String.RegexRule_String, *Quantifier.RegexRule_Quantifier, *Group.RegexRule_Group, *Backreference.RegexRule_Backreference
If GroupName
Name = " ("+Str(GroupIndex)+", '"+GroupName+"') "
ElseIf GroupIndex <> -1
Name = " ("+Str(GroupIndex)+") "
EndIf
Prefix = Space(Level*2)
If Not *Rule
Debug Prefix+"Empty"+Name
Else
Select *Rule\Type
Case #RegexRule_Any
Debug Prefix+"Any"+Name
Case #RegexRule_Set
*Set = *Rule
For Field = 0 To ArraySize(*Set\Mask())
For Bit = 0 To 31
If *Set\Mask(Field) & 1<<Bit
String + Chr(Field<<5+Bit)
EndIf
Next
Next
If *Set\Negate
Debug Prefix+"Set (Negate)"+Name+": '"+String+"'"
Else
Debug Prefix+"Set"+Name+": '"+String+"'"
EndIf
Case #RegexRule_String
*String = *Rule
For Field = 0 To ArraySize(*String\Character())
String + Chr(*String\Character(Field))
Next
Debug Prefix+"String"+Name+": "+String
Case #RegexRule_Concatenation
Debug Prefix+"Concatenation"+Name
*Child = *Rule\FirstChild
While *Child
Regex_Debug(*Child, Level+1)
*Child = *Child\NextSibling
Wend
Case #RegexRule_Decision
Debug Prefix+"Decision"+Name
*Child = *Rule\FirstChild
While *Child
Regex_Debug(*Child, Level+1)
*Child = *Child\NextSibling
Wend
Case #RegexRule_Quantifier
*Quantifier = *Rule
Select *Quantifier\Mode
Case #RegexQuantifier_Modest
Debug Prefix+"Quantifier"+Name+" [ "+Str(*Quantifier\Min)+" , "+Str(*Quantifier\Max)+" ] (modest)"
Case #RegexQuantifier_Normal
Debug Prefix+"Quantifier"+Name+" [ "+Str(*Quantifier\Min)+" , "+Str(*Quantifier\Max)+" ] (normal)"
Case #RegexQuantifier_Greedy
Debug Prefix+"Quantifier"+Name+" [ "+Str(*Quantifier\Min)+" , "+Str(*Quantifier\Max)+" ] (greedy)"
EndSelect
Regex_Debug(*Rule\FirstChild, Level+1)
Case #RegexRule_Group
*Group = *Rule
Regex_Debug(*Rule\FirstChild, Level, *Group\Index, *Group\Name)
Case #RegexRule_Backreference
*Backreference = *Rule
Debug Prefix+"Backreference : "+Str(*Backreference\Index)
EndSelect
EndIf
EndProcedure
Procedure Regex_AddExamineState(Position.i, *Rule.RegexRule_Quantifier, Parameter.i=0)
If *Rule\Type = #RegexRule_Quantifier And *Rule\Mode = #RegexQuantifier_Greedy And RegexInclude\Stack\Index > 0 And RegexInclude\Stack\Entry(RegexInclude\Stack\Index-1)\Rule = *Rule
RegexInclude\Stack\Index - 1
Debug " > Replace State ["+Str(RegexInclude\Stack\Index)+"]" , 2
Else
Debug " > Add State ["+Str(RegexInclude\Stack\Index)+"]" , 2
EndIf
Protected *Entry.RegexInclude_StackEntry = RegexInclude\Stack\Entry(RegexInclude\Stack\Index)
*Entry\Position = Position
*Entry\Rule = *Rule
*Entry\Parameter = Parameter
RegexInclude\Stack\Index + 1
EndProcedure
Procedure Regex_GetExamineState()
If RegexInclude\Stack\Index
ProcedureReturn RegexInclude\Stack\Entry(RegexInclude\Stack\Index-1)
EndIf
EndProcedure
Procedure Regex_Examine(*Character.CharacterArray, *Rule.RegexRule, Map Hit.s())
Protected Position.i, String.s
Protected LastIndex.i, Index.i
Protected *ParentRule.RegexRule, *State.RegexInclude_StackEntry
Protected *Set.RegexRule_Set, *String.RegexRule_String, *Quantifier.RegexRule_Quantifier, *Group.RegexRule_Group, *Backreference.RegexRule_Backreference
Debug "Examine: '"+PeekS(*Character)+"'" , 2
RegexInclude\Stack\Index = 0
With *Rule
Regex_Examine_Continue:
Repeat
Select \Type
Case #RegexRule_Group ; Unterausdruck
*Group = *Rule
Select *Group\Mode
Case #RegexGroup_Normal
Debug RSet(Str(Position),5)+" Open Group" , 2
Case #RegexGroup_PositiveLookahead
Debug RSet(Str(Position),5)+" Open Positive Lookahead" , 2
Case #RegexGroup_NegativeLookahead
Debug RSet(Str(Position),5)+" Open Negative Lookahead" , 2
EndSelect
*Group\TempPosition = Position
If \FirstChild
*Rule = \FirstChild
Continue
EndIf
Case #RegexRule_Concatenation ; Verkettung
Debug RSet(Str(Position),5)+" Open Concatenation" , 2
*Rule = *Rule\FirstChild
Continue
Case #RegexRule_Decision ; Auswahl
Debug RSet(Str(Position),5)+" Open Decision" , 2
Regex_AddExamineState(Position, *Rule, *Rule\FirstChild)
*Rule = *Rule\FirstChild
Continue
Case #RegexRule_Quantifier ; Wiederholung
*Quantifier = *Rule
If *Quantifier\Max > 0
Debug RSet(Str(Position),5)+" Open Quantifier (Value=0)" , 2
*Quantifier\TempValue = 0
If *Quantifier\TempValue >= *Quantifier\Min
Regex_AddExamineState(Position, *Rule, *Quantifier\TempValue)
If *Quantifier\Mode = #RegexQuantifier_Modest
Debug RSet(Str(Position),5)+" Break Quantifier (Value="+Str(*Quantifier\TempValue)+")" , 2
Else
*Rule = \FirstChild
Continue
EndIf
Else
*Rule = \FirstChild
Continue
EndIf
EndIf
Case #RegexRule_Any ; Beliebiges Zeichen
If *Character\c[Position] <> #NUL
Debug RSet(Str(Position),5)+" Check Any: +1" , 2
Position + 1
Else
Debug RSet(Str(Position),5)+" Check Any: FAIL" , 2
Position = #Regex_NoMatch
;Break
EndIf
Case #RegexRule_Backreference ; Rückverweis
*Backreference = *Rule
String = Hit(Str(*Backreference\Index))
If PeekS(@*Character\c[Position], Len(String)) = String
Debug RSet(Str(Position),5)+" Check Backreference: +"+Str(Len(String)) , 2
Position + Len(String)
Else
Debug RSet(Str(Position),5)+" Check Backreference: FAIL" , 2
Position = #Regex_NoMatch
;Break
EndIf
Case #RegexRule_Set ; Zeichenklasse
*Set = *Rule
If *Character\c[Position] <> #NUL
If ArraySize(*Set\Mask()) < *Character\c[Position]>>5 Or *Set\Mask(*Character\c[Position]>>5) & 1<<(*Character\c[Position]%32) = 0
If *Set\Negate
Debug RSet(Str(Position),5)+" Check Set: +1" , 2
Position + 1
Else
Debug RSet(Str(Position),5)+" Check Set: FAIL" , 2
Position = #Regex_NoMatch
;Break
EndIf
Else
If *Set\Negate
Debug RSet(Str(Position),5)+" Check Set: FAIL" , 2
Position = #Regex_NoMatch
;Break
Else
Debug RSet(Str(Position),5)+" Check Set: +1" , 2
Position + 1
EndIf
EndIf
Else
Position = #Regex_NoMatch
;Break
EndIf
Case #RegexRule_String ; Zeichenkette
*String = *Rule
Repeat
LastIndex.i = ArraySize(*String\Character())
For Index = 0 To LastIndex
If *Character\c[Position+Index] <> *String\Character(Index)
Debug RSet(Str(Position),5)+" Check String: FAIL" , 2
Position = #Regex_NoMatch
Break 2
EndIf
Next
Debug RSet(Str(Position),5)+" Check String: +"+Str(LastIndex+1) , 2
Position + (LastIndex+1)
Until #True
EndSelect
Regex_Examine_Back:
Repeat
*ParentRule = *Rule\Parent
If *ParentRule
Select *ParentRule\Type
Case #RegexRule_Group
*Rule = *ParentRule
*Group = *Rule
If *Group\Mode = #RegexGroup_PositiveLookahead
If Position = #Regex_NoMatch
Debug " Close Positive Lookahead" , 2
Else
Position = *Group\TempPosition
Debug RSet(Str(Position),5)+" Close Positive Lookahead" , 2
EndIf
ElseIf *Group\Mode = #RegexGroup_NegativeLookahead
If Position = #Regex_NoMatch
Position = *Group\TempPosition
Debug RSet(Str(Position),5)+" Close Negative Lookahead" , 2
Else
Position = #Regex_NoMatch
Debug " Close Negative Lookahead" , 2
EndIf
EndIf
If Position = #Regex_NoMatch
Debug " Fail" , 2
Break 2
EndIf
If *Group\Index <> -1
String = PeekS(@*Character\c[*Group\TempPosition], Position-*Group\TempPosition)
Hit(Str(*Group\Index)) = String
If *Group\Name
Hit(*Group\Name) = String
EndIf
EndIf
If *Group\Mode = #RegexGroup_Normal
Debug RSet(Str(Position),5)+" Close Group '"+String+"'" , 2
ElseIf *Group\Mode = #RegexGroup_NoIndex
Debug RSet(Str(Position),5)+" Close Group" , 2
EndIf
Case #RegexRule_Concatenation
If Position = #Regex_NoMatch
Debug " Fail" , 2
Break 2
EndIf
*Rule = *Rule\NextSibling
If *Rule
Debug RSet(Str(Position),5)+" Continue Concatenation" , 2
Break
Else
Debug RSet(Str(Position),5)+" Close Concatenation" , 2
*Rule = *ParentRule
EndIf
Case #RegexRule_Decision
If Position = #Regex_NoMatch
Debug " Fail" , 2
Break 2
EndIf
Debug RSet(Str(Position),5)+" Close Decision" , 2
*Rule = *ParentRule
Case #RegexRule_Quantifier
If Position = #Regex_NoMatch
Debug " Fail" , 2
Break 2
EndIf
*Quantifier = *ParentRule
*Quantifier\TempValue + 1
If *Quantifier\Max > *Quantifier\TempValue
If *Quantifier\TempValue >= *Quantifier\Min
Regex_AddExamineState(Position, *ParentRule, *Quantifier\TempValue)
EndIf
If *Quantifier\Mode = #RegexQuantifier_Modest
Debug RSet(Str(Position),5)+" Break Quantifier (Value="+Str(*Quantifier\TempValue)+")" , 2
*Rule = *ParentRule
Else
Debug RSet(Str(Position),5)+" Continue Quantifier (Value="+Str(*Quantifier\TempValue)+")" , 2
Break
EndIf
Else
Debug RSet(Str(Position),5)+" Close Quantifier (Value="+Str(*Quantifier\TempValue)+")" , 2
*Rule = *ParentRule
EndIf
EndSelect
Else
Break 2
EndIf
ForEver
ForEver
While Position = #Regex_NoMatch
*State = Regex_GetExamineState()
If *State
*Rule = *State\Rule
; Kein Select-Block wegen Goto!
If \Type = #RegexRule_Quantifier
*Quantifier = *Rule
Position = *State\Position
*Quantifier\TempValue = *State\Parameter
RegexInclude\Stack\Index - 1
Debug " < Backtracking" , 2
If *Quantifier\Mode = #RegexQuantifier_Modest
*Rule = *Rule\FirstChild
Goto Regex_Examine_Continue
Else
Goto Regex_Examine_Back
EndIf
ElseIf \Type = #RegexRule_Decision
*Rule = *State\Child\NextSibling
*State\Parameter = *Rule
If *Rule
Position = *State\Position
Debug " < Backtracking" , 2
Goto Regex_Examine_Continue
Else
RegexInclude\Stack\Index - 1
EndIf
EndIf
Else
Break
EndIf
Wend
EndWith
ProcedureReturn Position
EndProcedure
;- 4.2 Procedures for the Regex
;¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
Procedure.i RegexID(Regex.i)
If Regex & #Regex_AnyNumber
ProcedureReturn Regex
Else
ProcedureReturn RegexInclude\RegexID(Regex)
EndIf
EndProcedure
Procedure.i IsRegex(Regex.i)
ForEach RegexInclude\Regex()
If RegexInclude\Regex()\Number = Regex
ProcedureReturn @RegexInclude\Regex()
EndIf
Next
EndProcedure
Procedure.i FreeRegex(Regex.i)
Protected *Regex.Regex = RegexID(Regex)
With *Regex
If Not \Number & #Regex_AnyNumber
RegexInclude\RegexID(\Number) = #Null
EndIf
ChangeCurrentElement(RegexInclude\Regex(), *Regex)
DeleteElement(RegexInclude\Regex())
EndWith
EndProcedure
Procedure.i CreateRegex(Regex.i, Syntax.s)
Protected *Regex.Regex
Protected Input.RegexInput
If Regex = #PB_Any
*Regex.Regex = AddElement(RegexInclude\Regex())
*Regex\Number = *Regex
ElseIf Not Regex & #Regex_AnyNumber
*Regex.Regex = AddElement(RegexInclude\Regex())
*Regex\Number = Regex
If ArraySize(RegexInclude\RegexID()) < Regex
ReDim RegexInclude\RegexID(Regex)
ElseIf RegexInclude\RegexID(Regex)
FreeRegex(RegexInclude\RegexID(Regex))
EndIf
RegexInclude\RegexID(Regex) = *Regex
Else
ProcedureReturn #Null
EndIf
Input\Character = @Syntax
Input\Index = 0
Input\CurrentGroupIndex = -1
Input\Regex = *Regex
*Regex\Rule = Regex_CreateRule_Group(@Input)
ProcedureReturn *Regex
EndProcedure
Procedure DebugRegex(Regex.i)
Protected *Regex.Regex = RegexID(Regex)
Regex_Debug(*Regex\Rule)
EndProcedure
Procedure ExtractRegex(Regex.i, String.s, Callback.ExtractRegexCallback)
Protected *Regex.Regex = RegexID(Regex)
Protected *Character.CharacterArray = @String
Protected Index.i, Result.i, Group.i
Protected NewMap Hit.s()
With *Regex
While *Character\c[Index]
ClearMap(Hit())
Result = Regex_Examine(@*Character\c[Index], \Rule, Hit())
If Result <> #Regex_NoMatch
If FindMapElement(Hit(), "0")
Callback(Hit())
If Result > 0
Index + Result - 1
EndIf
EndIf
EndIf
Index + 1
Wend
EndWith
EndProcedure
Procedure FindRegex(Regex.i, String.s, Callback.FindRegexCallback)
Protected *Regex.Regex = RegexID(Regex)
Protected *Character.CharacterArray = @String
Protected Index.i, Result.i, Group.i
Protected NewMap Hit.s()
With *Regex
While *Character\c[Index]
ClearMap(Hit())
Result = Regex_Examine(@*Character\c[Index], \Rule, Hit())
If Result <> #Regex_NoMatch
Callback(Index, Result)
If Result > 0
Index + Result - 1
EndIf
EndIf
Index + 1
Wend
EndWith
EndProcedure
Procedure.s ReplaceRegex(Regex.i, String.s, Callback.ReplaceRegexCallback)
Protected *Regex.Regex = RegexID(Regex)
Protected *Character.CharacterArray = @String
Protected Index.i, Result.i, Group.i, NewString.s
Protected NewMap Hit.s()
With *Regex
While *Character\c[Index]
ClearMap(Hit())
Result = Regex_Examine(@*Character\c[Index], \Rule, Hit())
If Result <> #Regex_NoMatch
FindMapElement(Hit(), "0")
NewString = Callback(Hit())
String = Left(String, Index) + NewString + Mid(String, Index+Result+1)
*Character = @String
If Result > 0
Index + Len(NewString) - 1
EndIf
EndIf
Index + 1
Wend
EndWith
ProcedureReturn String
EndProcedure
DataSection
Regex_Set_d:
Data.l $00000000, $03FF0000, $00000000, $00000000, $00000000, $00000000, $00000000, $00000000
Regex_Set_s:
Data.l $00002600, $00000001, $00000000, $00000000, $00000000, $00000000, $00000000, $00000000
Regex_Set_w:
Data.l $00000000, $03FF0000, $87FFFFFE, $07FFFFFE, $00000000, $00000000, $00000000, $00000000
EndDataSection