findInString(), ignore string literals and comments

Share your advanced PureBasic knowledge/code with the community.
#NULL
Addict
Addict
Posts: 1440
Joined: Thu Aug 30, 2007 11:54 pm
Location: right here

findInString(), ignore string literals and comments

Post by #NULL »

A prodcedure to search a substring in a string that might be a line of PureBasic code, so it has the option to ignore occurrences in string literals and comments.
For example the string "IncludeFile" will by default be found only in the last of the following 3 lines:

Code: Select all

; IncludeFile "MyFile"
myString.s = ~"IncludeFile \"MyFile\""
IncludeFile "MyFile"
It does not handle case-insensitivity and word boundaries, but i don't want to create a jack of all trades device (if that's proper english).

Code: Select all

EnableExplicit

DeclareModule common
  
  EnableExplicit
  
  Declare.i findInString(*inStr.Character, *findStr.Character, searchInStrings = #False, searchInComments = #False)
  
EndDeclareModule

Module common
  
  EnableExplicit
  
  Procedure.i findInString(*inStr.Character, *findStr.Character, searchInStrings = #False, searchInComments = #False)
    
    #_DQ  = '"'  ; "
    #_SQ  = 39   ; '
    #_BSL = '\'  ; \
    
    Protected inStringDQ    ;  "x"
    Protected inStringSQ    ;  'x'
    Protected inStringDQEsc ; ~"x"
    Protected inString      ; (any)
    Protected inStringPrevious
    Protected inComment     ; .. ; ...
    Protected i
    Protected *c.Character
    Protected *cTmp.Character
    Protected cntBSL
    Protected iTmp
    Protected *cBefore.Character
    Protected *cFind.Character
    
    If *inStr And *findStr And *findStr\c <> 0
      
      *c = *inStr
      *cBefore = 0
      i = 0
      
      While *c\c <> 0
        
        inStringPrevious = inString
        
        If searchInStrings = #False
          ; for excluding string content we need to detect string openings/closings
          
          If Not inString
            
            If Not inComment
              
              ; start single quote string 
              If *c\c = #_SQ                                                 ; opening '..
                inString = #True
                inStringSQ = #True
                
              ; start double quote string
              ElseIf *c\c = #_DQ                                             ; opening ".. or ~"..
                inString = #True
                If *cBefore And *cBefore\c = '~' : inStringDQEsc = #True     ; escape string ~"..
                Else                             : inStringDQ    = #True     ; normal string  "..
                EndIf
              EndIf
              
            EndIf
            
          Else
            ; (inString)
            
            If inStringDQEsc
              If *c\c = #_DQ
                
                ; check if DQ is escaped by preceding backslash.
                ; backslashes can be escaped themself by a preceding backslash, so the
                ; DQ is escaped if preceded by an odd number of continuous backslashes.
                If i > 0
                  *cTmp = *c - SizeOf(Character)
                  iTmp  = i - 1
                  cntBSL = 0
                  Repeat
                    If *cTmp\c = #_BSL
                      cntBSL + 1
                    Else
                      Break
                    EndIf
                    *cTmp - SizeOf(Character)
                    iTmp - 1
                  Until iTmp < 0
                  If cntBSL % 2 = 0 ; even number of preceding backslashed, DQ is not escaped, close string
                    inStringDQEsc = #False
                    inString      = #False
                  EndIf
                Else
                  ; just here for logic. if not i>0 we wouldn't be in a string anyway.
                  inStringDQEsc = #False
                  inString      = #False
                EndIf
              EndIf
              
            ElseIf inStringDQ
              If *c\c = #_DQ
                inStringDQ = #False
                inString   = #False
              EndIf
              
            ElseIf inStringSQ
              If *c\c = #_SQ
                inStringSQ = #False
                inString   = #False
              EndIf
              
            EndIf
          EndIf
          
        EndIf
        
        If Not inString
          If *c\c = ';' ; begin of comment
            inComment = #True
            If Not searchInComments
              ; finish if comment reached and not searched in
              ProcedureReturn -1
            EndIf
          EndIf
        EndIf
        
        If ((Not inString) Or (searchInStrings)) And ((Not inComment) Or (searchInComments))
          
          ; first char matches
          If *c\c = *findStr\c
            
            ; if findStr starts with the actual character " or ' and searchInStrings is disabled then we don't want
            ; a match at a string closing delimiter (inString will be already be reset at this point)
            If ((*findStr\c = #_DQ) Or (*findStr\c = #_SQ)) And (Not inString And inStringPrevious And Not searchInStrings)
              ; (ignore)
            Else
              
              ; compare all chars
              *cTmp = *c
              *cFind = *findStr
              While (*cTmp\c = *cFind\c) And (*cTmp\c <> 0)
                *cTmp  + SizeOf(Character)
                *cFind + SizeOf(Character)
              Wend
              If *cFind\c = 0
                ; reached end of find, all chars matched, return start pos.
                ; null or empty inStr/findStr are handled by If/While at the beginning of the procedure and will return -1.
                ProcedureReturn i
              EndIf
            EndIf
          EndIf
        EndIf
        
        *cBefore = *c
        *c + SizeOf(Character)
        i + 1
      Wend
    EndIf
    ProcedureReturn -1
  EndProcedure
  
EndModule

CompilerIf #PB_Compiler_IsMainFile
  Macro DQ
    "
  EndMacro
  Macro assert(exp1, exp2)
    If Not ((exp1) = (exp2))
      Debug exp1
      Debug exp2
      DebuggerError("assert failed at line " + Str(#PB_Compiler_Line))
      ;DebuggerError(DQ#exp1#DQ + " = " + Str(exp1))
    EndIf
  EndMacro
  
  If 1 ;{
    Define s.s = ""
    Define p
    
    s = #Null$                              : assert(common::findInString(@ s, @ ";"), -1)
    s = ""                                  : assert(common::findInString(@ s, @ ";"), -1)
    s = ""                                  : assert(common::findInString(@ s, @ ""), -1)
    s = "a"                                 : assert(common::findInString(@ s, @ ""), -1)
    
    ; if not searching in comments then even the comment opener ';' won't be found
    s = ";"                                 : assert(common::findInString(@ s, @ ";"), -1)
    
    s = "abc ; comment"                     : assert(common::findInString(@ s, @ ";", #True, #True), 4)
    s = "abc ; comment"                     : assert(common::findInString(@ s, @ "com", #True, #True), 6)
    s = "abc ; comment and ; more comment"  : assert(common::findInString(@ s, @ ";", #True, #True), 4)
    s = ";"                                 : assert(common::findInString(@ s, @ ";", #True, #True), 0)
    
    s = ""                                  : assert(common::findInString(  0,    @ "",  #True, #True), -1)
    s = ""                                  : assert(common::findInString(@ s,       0,  #True, #True), -1)
    s = ""                                  : assert(common::findInString(  0,       0,  #True, #True), -1)
    s = ""                                  : assert(common::findInString(@ s,    @ "",  #True, #True), -1)
    
    s = " 'x' ; x"                          : assert(common::findInString(@ s,   @ "x",  #False, #False), -1)
    s = " 'x' ; x"                          : assert(common::findInString(@ s,   @ "x",  #False, #True), 7)
    s = " 'x' ; x"                          : assert(common::findInString(@ s,   @ "x",  #True, #True), 2)
    s = " 'x' ; x"                          : assert(common::findInString(@ s,   @ "x",  #True, #False), 2)
    s = "  y ; 'x' "                        : assert(common::findInString(@ s,   @ "x",  #True, #False), -1)
    s = ~" y ; \"x\" "                      : assert(common::findInString(@ s,   @ "x",  #True, #False), -1)
    
    ; if not searching in strings then even a string opener won't be found
    s = ~"\""                               : assert(common::findInString(@ s,   @ ~"\"",  #False), -1)
    s = ~"\""                               : assert(common::findInString(@ s,   @ ~"\"",  #True), 0)
    ; if not searching in strings then a string closing delimiter won't be found
    s = ~"\"abc\" + str"                    : assert(common::findInString(@ s,   @ ~"\""), -1)
    ; except if searching and occurring in comments
    s = ~"\"abc\" + str ; \"xyz\""          : assert(common::findInString(@ s,   @ ~"\"", #False, #True), 14)
    
    
    Debug "##################### 1"
    s = ~"abc + \"xyz;123\" ; comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 16)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 10)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    
    Debug "##################### 2"
    s = "abc + ';' + xyz ; comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 16)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 7)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    
    Debug "##################### 3"
    s = ~"abc + \"';'\" + xyz ; comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 18)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 8)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    
    Debug "##################### 4"
    s = ~"abc + ~\"\\\"';'\\\"\" + xyz ; comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 23)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 11)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    
    Debug "##################### 5"
    s = ~"abc + ~\"\\\";\\\"\" + xyz ; comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 21)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 10)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    
    Debug "##################### 6"
    s = ~"';' ; comment ; more comment"
    Debug s
    p = common::findInString(@ s, @";", #False, #True)
    Debug p
    assert(p, 4)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    p = common::findInString(@ s, @";", #True, #True)
    Debug p
    assert(p, 1)
    Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
    Debug ""
    
    Debug "##################### 7"
    s = ~"; IncludeFile \"MyFile\""
    p = common::findInString(@ s, @ "IncludeFile")
    Debug s ;   ; IncludeFile "MyFile"
    Debug p ;   -1
    assert(p, -1)
    Debug "##################### 8"
    s = ~"myString.s = ~\"IncludeFile \\\"MyFile\\\"\""
    p = common::findInString(@ s, @ "IncludeFile")
    Debug s ;   myString.s = ~"IncludeFile \"MyFile\""
    Debug p ;   -1
    assert(p, -1)
    Debug "##################### 9"
    s = ~"IncludeFile \"MyFile\""
    p = common::findInString(@ s, @ "IncludeFile")
    Debug s ;   IncludeFile "MyFile"
    Debug p ;   0
    assert(p, 0)
    
    If 1
      Debug "##################### 10"
      ; find x in this file outside of strings and comments
      If ReadFile(0, #PB_Compiler_File)
        ReadStringFormat(0)
        While Not Eof(0)
          Define line.s = ReadString(0)
          Define pos = common::findInString(@line, @"x")
          If pos >= 0
            Debug "found x at pos " + Str(pos) + " in line: " + line
          EndIf
        Wend
        CloseFile(0)
      Else
        Debug "can't read file: " + #PB_Compiler_File
      EndIf
    EndIf
    ;}

  EndIf
CompilerEndIf

;
#NULL
Addict
Addict
Posts: 1440
Joined: Thu Aug 30, 2007 11:54 pm
Location: right here

Re: findInString(), ignore string literals and comments

Post by #NULL »

- added a break condition if comment is reached and searchInComments is disabled.
- fixed: searching for " would find it at a string closing delimiter, even if searchInStrings is disabled.
Post Reply