"C language style" parser [Updated]

Share your advanced PureBasic knowledge/code with the community.
kinglestat
Enthusiast
Enthusiast
Posts: 732
Joined: Fri Jul 14, 2006 8:53 pm
Location: Malta
Contact:

"C language style" parser [Updated]

Post by kinglestat »

Code: Select all


; -- lexical parser for a simplified C Language 
; Tested in UTF8
; PBx64 v5.71
;
; Based on  https://rosettacode.org/wiki/Compiler/lexical_analyzer
; And
; https://rosettacode.org/wiki/Compiler/syntax_analyzer
; Distribute and use freely
; 
; Kingwolf71 Feb/2020
; Bugfix: 2/3/2020 
;

DeclareModule LJLang
   
   #WithEOL = 1
 
   Structure stTree
      NodeType.i
      value.s
      *left.stTree
      *right.stTree
   EndStructure
 
   Declare.s         Error( *error.Integer )
   Declare           Parse()
   Declare           LoadLJ( file.s )
EndDeclareModule

Module LJLang
   EnableExplicit
   
; ======================================================================================================
;- Constants
; ======================================================================================================
   
   #LJTOKENS   = 36
   #LJATTRS    = 26
   
   Enumeration 
      #ljIDENT
      #ljINT
      #ljIF
      #ljPRTS
      #ljWHILE
      #ljNEGATE
      #ljMULTIPLY
      #ljMOD
      #ljSUBTRACT
      #ljLESSEQUAL
      #ljGreaterEqual
      #ljNotEqual
      #ljOr
      #ljSTR
      #ljSEQ
      #ljPRTC
      #ljPRTI
      #ljASSIGN
      #ljNOT
      #ljDIVIDE
      #ljADD
      #ljLESS
      #ljGREATER
      #ljEQUAL
      #ljAND
      #ljXOR
      
      #ljLeftBrace
      #ljRightBrace
      #ljLeftParent
      #ljRightParent
      #ljSemi
      #ljComma   
      #ljElse
      
      #ljOP
      #ljKeyword
      #ljEOF

   
   EndEnumeration
   
; ======================================================================================================
;- Structures
; ======================================================================================================

   Structure stSymbols
      name.s
      TokenType.i
   EndStructure
   
   Structure stToken
      name.s
      TokenType.i
      TokenExtra.i
      value.s
      row.i
      col.i
   EndStructure
   
   Structure stPrec
      bRightAssociation.i
      bBinary.i
      bUnary.i
      Precedence.i
      NodeType.i
   EndStructure
   
; ======================================================================================================
;- Macros
; ======================================================================================================
   
   Macro         NextToken()
   
      ;Debug "---[ " + #PB_Compiler_Procedure + " ]---"
      ;Debug Str( ListIndex( llTokenList() ) ) + "   " + llTokenList()\name
      
      NextElement( llTokenList() )
   EndMacro
   
   Macro         Install( symbolname, id  )
      AddElement( llSymbols() )
         llSymbols()\name        = symbolname
         llSymbols()\TokenType   = id
   EndMacro
   
   Macro       AddTokenSimple( tkentype )
      AddElement( llTokenList() )
         llTokenList()\TokenType = tkentype
         llTokenList()\TokenExtra= tkentype
         llTokenList()\name      = gszATR( tkentype )
         llTokenList()\row       = gLineNumber
         llTokenList()\col       = gCol
   EndMacro
   
   Macro       AddToken( tkentype, tkenextra, text, info )
      AddElement( llTokenList() )
         llTokenList()\TokenType = tkentype
         llTokenList()\TokenExtra= tkenextra
         llTokenList()\row       = gLineNumber
         llTokenList()\col       = gCol
         
         If text = ""
            llTokenList()\name = gszATR( tkenextra )
         Else
            llTokenList()\name      = text
         EndIf
         
         llTokenList()\value    = info
   EndMacro
   
   Macro       SetError( text, err )
   
      If err > 0 And err < 10
         gszlastError   = text + " on line " + Str( gLineNumber ) + ", col = " + Str( gCol )
      ElseIf err > 10
         gszlastError   = text + " on line " + Str( llTokenList()\row ) + ", col = " + Str( llTokenList()\col )
      Else
         gszlastError   = text
      EndIf
      
      gLastError     = err
      
      ProcedureReturn err
   EndMacro
   
   Macro       NextCharacter()
      gNextChar = Mid( gszFileText, gPos, 1 )
      gPos + 1 : gCol + 1
      
      If gNextChar = #LF$
         gCol = 1
         gLineNumber + 1
         
         gNextChar = Mid( gszFileText, gPos, 1 )
         gPos + 1
      EndIf
   EndMacro
   
   Macro       SetPre( id, right, bi, un, prec )
   
      gPreTable( id )\NodeType            = id
      gPreTable( id )\bRightAssociation   = right
      gPreTable( id )\bBinary             = bi
      gPreTable( id )\bUnary              = un
      gPreTable( id )\Precedence          = prec
      
   EndMacro
   
   Macro       SetPre2( id, op )
   
      gPreTable( id )\bRightAssociation   = 0
      gPreTable( id )\bBinary             = 0
      gPreTable( id )\bUnary              = 0
      gPreTable( id )\Precedence          = -1
      gPreTable( id )\NodeType            = op
      
   EndMacro
   
   Macro       DebugParser()
   
      Debug "---[ Parser ]--------------"
   
      ForEach llTokenList()
         temp = RSet( Str(llTokenList()\row), 6 ) + "   " + RSet( Str(llTokenList()\col), 6 ) + "   "
         
         If llTokenList()\TokenExtra <> llTokenList()\TokenType
            temp + LSet( gszATR( llTokenList()\TokenType ) + "_" + llTokenList()\name, 34 ) + llTokenList()\value
         Else
            temp + LSet( llTokenList()\name, 34 ) + llTokenList()\value
         EndIf
         
         Debug temp
      Next
   EndMacro
   
; ======================================================================================================
;- Globals
; ======================================================================================================
  
   Global Dim        gszATR.s( #LJTOKENS )
   Global Dim        gPreTable.stPrec( #LJTOKENS )
   
   Global NewList    llSymbols.stSymbols()
   Global NewList    llTokenList.stToken()
   
   
   Global            gLineNumber
   Global            gStack
   Global            gCol
   Global            gMemSize
   Global            gPos
   Global            gExit
   
   Global            gszFileText.s
   Global            gszlastError.s
   Global            gNextChar.s
   Global            gLastError
   Global            gszInv.s          = Chr( 34 )
   Global            gszEOF.s          = Chr( 255 )
   
; ======================================================================================================
;- Procedure Definitions
; ======================================================================================================
      
   Declare              paren_expr()
   
; ======================================================================================================
;- Module Function
; ======================================================================================================
   
   Procedure       Init()
   
      gszATR(  0 ) = "Identifier"
      gszATR(  1 ) = "Integer"
      gszATR(  2 ) = "If"
      gszATR(  3 ) = "Prts"
      gszATR(  4 ) = "While"
      gszATR(  5 ) = "Negate"
      gszATR(  6 ) = "Multiply"
      gszATR(  7 ) = "Mod"
      gszATR(  8 ) = "Subtract"
      gszATR(  9 ) = "LessEqual"
      gszATR( 10 ) = "GreaterEqual"
      gszATR( 11 ) = "NotEqual"
      gszATR( 12 ) = "Or"
      gszATR( 13 ) = "String"
      gszATR( 14 ) = "Sequence"
      gszATR( 15 ) = "Prtc"
      gszATR( 16 ) = "Prti"
      gszATR( 17 ) = "Assign"
      gszATR( 18 ) = "Not"
      gszATR( 19 ) = "Divide"
      gszATR( 20 ) = "Add"
      gszATR( 21 ) = "Less"
      gszATR( 22 ) = "Greater"
      gszATR( 23 ) = "Equal"
      gszATR( 24 ) = "And"
      gszATR( 25 ) = "Xor"
      
      gszATR( 26 ) = "LeftBrace"
      gszATR( 27 ) = "RightBrace"
      gszATR( 28 ) = "LeftParent"
      gszATR( 29 ) = "RightParent"
      gszATR( 30 ) = "SemiColon"
      gszATR( 31 ) = "Comma"
      gszATR( 32 ) = "Else"
      
      gszATR( 33 ) = "Op"
      gszATR( 34 ) = "Keyword"
      gszATR( 35 ) = "End of file"

      SetPre2( #ljEOF, -1 )
      SetPre( #ljMULTIPLY,     0, 1, 0, 13 )
      SetPre( #ljDIVIDE,       0, 1, 0, 13 )
      SetPre( #ljMOD,          0, 1, 0, 13 )
      SetPre( #ljADD,          0, 1, 0, 12 )
      SetPre( #ljSUBTRACT,     0, 1, 0, 12 )
      SetPre( #ljNEGATE,       0, 0, 1, 14 )
      SetPre( #ljNOT,          0, 0, 1, 14 )
      SetPre( #ljLESS,         0, 1, 0, 10 )
      SetPre( #ljLESSEQUAL,    0, 1, 0, 10 )
      SetPre( #ljGREATER,      0, 1, 0, 10 )
      SetPre( #ljGreaterEqual, 0, 1, 0, 10 )
      SetPre( #ljEQUAL,        0, 1, 0, 9 )
      SetPre( #ljNotEqual,     0, 1, 0, 9 )
      SetPre2( #ljASSIGN,      #ljASSIGN )
      SetPre( #ljAND,          0, 1, 0, 5 )
      SetPre( #ljOr,           0, 1, 0, 4 )
      SetPre2( #ljIF,          #ljIF )
      SetPre2( #ljElse,        -1 )
      SetPre2( #ljWHILE,       #ljWHILE )
      SetPre2( #ljPRTS,        -1 )
      SetPre2( #ljPRTI,        -1 )
      SetPre2( #ljPRTC,        -1 )
      SetPre2( #ljLeftParent,  -1 )
      SetPre2( #ljLeftBrace,   -1 )
      SetPre2( #ljRightParent,  -1 )
      SetPre2( #ljRightBrace, -1 )
      SetPre2( #ljComma,  -1 )
      SetPre2( #ljSemi,  -1 )
      SetPre2( #ljIDENT,  #ljIDENT )
      SetPre2( #ljINT,    #ljINT )
      SetPre2( #ljSTR,    #ljSTR )
      
      ClearList( llTokenList() )
      ClearList( llSymbols() )
      
      gLineNumber = 1
      gCol        = 1
      gStack      = 0
      gExit       = 0
      
      Install( "else", #ljElse )
      install( "if",    #ljIF )
      install( "print", #ljPRTS )
      install( "putc",  #ljPRTC )
      install( "while", #ljWHILE )

   EndProcedure
   
   Procedure       LoadLJ( filename.s )
      Protected    f, *mem
   
      gMemSize = FileSize( filename )
      
      If gMemSize > 0
         
         f = ReadFile( #PB_Any, filename, #PB_UTF8 )
         
         If Not f
            SetError( "Could not open file", -3 )
         EndIf
         
         *Mem = AllocateMemory( gMemSize + 16, #PB_Memory_NoClear )
         ReadData( f, *Mem, gMemSize )
         CloseFile( f )
         
         CompilerIf( #WithEOL = 1 )
            gszFileText = PeekS( *mem, -1, #PB_UTF8 ) + gszEOF
         CompilerElse
            gszFileText = PeekS( *mem, -1, #PB_UTF8 )
         CompilerEndIf   
            
         gMemSize = Len( gszFileText )
         FreeMemory( *mem )
         gpos = 1
         
         ProcedureReturn 0
      EndIf
      
      SetError( "Invalid file", -2 )
   EndProcedure
   
   Procedure.s       Error( *error.Integer )
      Protected      szerror.s
   
      If gLastError
         
         *error\i       = gLastError
         szerror        = gszlastError
         gLastError     = 0
         gszlastError   = ""
   
         ProcedureReturn szerror
      EndIf
      
      *error\i = 0
      ProcedureReturn ""
   EndProcedure
   
   Procedure         IsNumber( init.i = 0 )
      Static         flag
      
      If init
         flag = 0
      Else
         If gNextChar >= "0" And gNextChar <= "9"
            ProcedureReturn 1
         ElseIf Not flag And gNextChar = ".'"
            flag + 1
            ProcedureReturn 1
         EndIf
      EndIf
   
      ProcedureReturn 0
   
   EndProcedure
   
   Procedure         IsAlpha()
      If ( gNextChar >= "a" And gNextChar <= "z" ) Or (gNextChar >= "A" And gNextChar <= "Z"  ) Or (gNextChar >= "0" And gNextChar <= "9" )
         ProcedureReturn 1
      EndIf
      
      ProcedureReturn 0
   EndProcedure
   
   Procedure         Follow( expect.s, ifyes.i, ifno.i, *err.Integer )
      NextCharacter()
      
      If gNextChar = expect
         AddToken( #ljOP, ifyes, "", "" )
      Else
         If ifno = -1
            *err\i = 5
            SetError( "follow unrecognized character", 5 )
         Else
            AddToken( #ljOP, ifno, "", "" )
            gPos - 1
         EndIf
      EndIf
      
      ProcedureReturn 0
   EndProcedure
   
   Procedure         Scanner()
       Protected     err, first, i
       Protected.s   text, temp
      
      While gPos <= gMemSize
         NextCharacter()
         
         Select gNextChar
            Case gszEOF
               AddTokenSimple( #ljEOF )
               Break

            Case " ", #CR$, #TAB$, ""
               Continue
            
            Case "{"
               AddTokenSimple( #ljLeftBrace )
            Case "}"
               AddTokenSimple( #ljRightBrace )
            Case "("
               AddTokenSimple( #ljLeftParent )
            Case ")"
               AddTokenSimple( #ljRightParent )
            Case "+"
               AddToken( #ljOP, #ljADD, "", "" )
            Case "-"
               AddToken( #ljOP, #ljSUBTRACT, "", "" )
            Case "*"
               AddToken( #ljOP, #ljMULTIPLY, "", "" )
            Case "%"    
               AddToken( #ljOP, #ljMOD, "", "" )
            Case ";"
               AddTokenSimple( #ljSemi )
            Case ","
               AddTokenSimple( #ljComma )
            Case "/"
               NextCharacter()
               
               If gNextChar <> "*"
                  AddToken( #ljOP, #ljDIVIDE, "", "" )
                  gPos - 1
               Else        ;Must be a comment
                  
                  NextCharacter()
                  text = ""
                  
                  Repeat
                     NextCharacter()
                     text + gNextChar
                     
                     If gNextChar = "*"
                        NextCharacter()
                        
                        If gNextChar = "/"
                           Break
                        EndIf
                     ElseIf gNextChar = #CR$
                        gLineNumber + 1
                     EndIf

                  Until gPos >= gMemSize
                  
                  If gPos >= gMemSize
                     SetError( "EOF in comment", 1 )
                  EndIf

               EndIf
            Case "'"
               NextCharacter()
               
               If gNextChar = "'"
                  SetError( "Empty character literal", 2 )
               ElseIf gNextChar = "\"
                  NextCharacter()
                  
                  Select gNextChar
                     Case "'"
                        SetError( "Empty escape character literal", 2 )
                     Case "n"
                        first = 10
                     Case "r"
                        first = 13
                     Case "\"
                        first = 92
                     Default
                        SetError( "Invalid escape character", 3 )
                  EndSelect
               Else
                  first = Asc( gNextChar )
               EndIf
               
               NextCharacter()
               
               If gNextChar <> "'"
                  SetError( "Multi-Character literal", 4 )
               Else
                  AddToken( #ljINT, #ljINT, "", Str(first) )
               EndIf
               
            Case "<"
               If Follow( "=", #ljLESSEQUAL, #ljLESS, @err ) : ProcedureReturn err : EndIf
            Case ">"
               If Follow( "=", #ljGreaterEqual, #ljGREATER, @err ) : ProcedureReturn err : EndIf
            Case "!"
               If Follow( "=", #ljNotEqual, #ljNOT, @err ) : ProcedureReturn err : EndIf
            Case "="
               If Follow( "=", #ljEQUAL, #ljASSIGN, @err ) : ProcedureReturn err : EndIf
            Case "&"
               If Follow( "&", #ljAND, -1, @err ) : ProcedureReturn err : EndIf
            Case "|"
               If Follow( "|", #ljOr, -1, @err ) : ProcedureReturn err : EndIf
            Case "%"
               If Follow( "%%", #ljxOr, -1, @err ) : ProcedureReturn err : EndIf
            
            Case gszInv
               text = gNextChar
               
               Repeat
                  NextCharacter()
                  
                  If gNextChar = gszInv
                     text + gNextChar
                     AddToken( #ljStr, #ljSTR, "", text )
                     Break
                  ElseIf gNextChar = #LF$ Or gNextChar = #CR$
                     SetError( "EOL in string", 7 )
                  Else
                     text + gNextChar
                  EndIf
               
               Until gPos >= gMemSize
               
               If gPos >= gMemSize
                  SetError( "EOF in string", 6 )
               EndIf
            Default
               IsNumber( 1 )        ; reset digit flag
               
               first    = IsNumber()
               text     = ""
               
               While gPos < gMemSize And ( IsAlpha() Or gNextChar = "_" )
                  If Not IsNumber() : first = 0 : EndIf
                  text + gNextChar
                  NextCharacter()
               Wend
            
               If gPos >= gMemSize
                  SetError( "EOL in number or variable '" + text + "'", 8 )
               EndIf
               
               If Len( text ) < 1
                  SetError( "Unknown sequence number or variable '" + text + "'", 9 )
               EndIf
               
               gPos - 1
               i = 0
               temp = LCase( text )
               
               ForEach llSymbols()
                  i + 1
                  
                  If llSymbols()\name = temp
                     AddToken( #ljKeyword, llSymbols()\TokenType, "", text )
                     i = -1
                     Break
                  EndIf
               Next
               
               If i > 0
                  If first
                     AddToken( #ljINT, #ljINT, "", text )
                  Else
                     AddToken( #ljIDENT, #ljIDENT, "", text )
                  EndIf
               EndIf
               
         EndSelect
      Wend
      
      ProcedureReturn 0
   
   EndProcedure
   
   Procedure         Expect( function.s, TokenType )
      
      ;Debug "--Expect--"
      ;Debug llTokenList()\name + " --> " + gszATR( TokenType )
      
      If llTokenList()\TokenExtra = TokenType
         NextToken()
         ProcedureReturn 0
      EndIf
      
      SetError( "Expecting " + gszATR( TokenType ) + " but found " + gszATR( llTokenList()\TokenExtra ) + " for " + function, 11 )
   
   EndProcedure
   
   Procedure         MakeNode( NodeType, *left.stTree, *right.stTree )
      Protected      *p.stTree = AllocateStructure( stTree )
      
      *p\NodeType = NodeType
      *p\left     = *left
      *p\right    = *right
      
      ProcedureReturn *p
   EndProcedure
   
   Procedure         Makeleaf( NodeType, value.s )
      Protected      *p.stTree = AllocateStructure( stTree )
      
      *p\NodeType    = NodeType
      *p\value       = value
      
      ProcedureReturn *p
   EndProcedure

   Procedure            expr( var )
      Protected.stTree  *x, *node
      Protected         op, q
      
      Select llTokenList()\TokenExtra
         Case #ljLeftParent
            *x = paren_expr()
            
         Case #ljSUBTRACT, #ljADD
            op = llTokenList()\TokenExtra
            NextToken()
            *node = expr( gPreTable( #ljNEGATE )\Precedence )
            
            If op = #ljSUBTRACT
               *x = MakeNode( #ljNEGATE, *node, 0 )
            Else
               *x = *Node
            EndIf
            
         Case  #ljNOT
            NextToken()
            *x = MakeNode( #ljNOT, expr( gPreTable( #ljNOT )\Precedence ), 0 )
            
         Case #ljIDENT
            *x = Makeleaf( #ljIDENT, llTokenList()\value )
            NextToken()

         Case #ljINT
            *x = Makeleaf( #ljINT, llTokenList()\value )
            NextToken()
            
         Default
            SetError( "Expecting a primary, found " + llTokenList()\name, 12 )
      
      EndSelect
      
      While gPreTable( llTokenList()\TokenExtra )\bBinary And gPreTable( llTokenList()\TokenExtra )\Precedence >= var
         op = llTokenList()\TokenExtra
         NextToken()
      
         q = gPreTable( op )\Precedence
         
         If Not gPreTable( op )\bRightAssociation
            q + 1
         EndIf
      
         *node = expr( q )
         *x = MakeNode( gPreTable( op )\NodeType, *x, *node )
      Wend
      
      ProcedureReturn *x

   EndProcedure
   
   Procedure            paren_expr()
      Protected         *p.stTree
      
      Expect( "paren_expr", #ljLeftParent )
      *p = expr( 0 )
      Expect( "paren_expr", #ljRightParent )
      ProcedureReturn *p
      
   EndProcedure
      
   
   Procedure            stmt()
      Protected.stTree  *p, *v, *e, *s, *s2

      gStack + 1
      
      If gStack > 32
         NextToken()
         SetError( "Stack overflow", 15 )
      EndIf
      
      ;Debug llTokenList()\name + " --> " + gszATR( llTokenList()\TokenExtra )
      
      Select llTokenList()\TokenExtra
      
         Case #ljIF
            NextToken()
            *e    = paren_expr()
            *s    = stmt()
            *s2   = 0 
            
            If llTokenList()\TokenType = #ljElse
               NextToken()
               *s2 = stmt()
            EndIf
            
            *p = MakeNode( #ljIF, *e, MakeNode( #ljIF, *s, *s2 ) )
         
         Case #ljPRTC
            NextToken()
            *e    = paren_expr()
            *p    = MakeNode( #ljPRTC, *e, 0 )
            expect( "putc", #ljSemi )
         
         Case #ljPRTS
            NextToken()
            expect( "print", #ljLeftParent )
            
            Repeat
               If llTokenList()\TokenExtra = #ljSTR
                  *e = MakeNode( #ljPRTS, Makeleaf( #ljSTR, llTokenList()\value ), 0 )
                  NextToken()
               Else
                  *e = MakeNode( #ljPRTI, expr( 0 ), 0 )
               EndIf
               
               *p = MakeNode( #ljSEQ, *p, *e )
               
               If llTokenList()\TokenExtra <> #ljComma
                  Break
               EndIf
               
               expect( "print", #ljComma )
               
            Until llTokenList()\TokenType = #ljEOF
            
            Expect( "Print", #ljRightParent )
            Expect( "Print", #ljSemi )
         Case #ljSemi
            NextToken()
            
         Case #ljIDENT
            *v = Makeleaf( #ljIDENT, llTokenList()\value )
            NextToken()
            Expect( "Assign", #ljASSIGN )
            *e = expr( 0 )
            *p = MakeNode( #ljASSIGN, *v, *e )
            Expect( "Assign", #ljSemi )
            
         Case #ljWHILE
            NextToken()
            *e = paren_expr()
            *s = stmt()
            *p = MakeNode( #ljWHILE, *e, *s )
            
         Case #ljLeftBrace
               
            Expect( "Left Bracket", #ljLeftBrace )
            
            While llTokenList()\TokenExtra <> #ljRightBrace And llTokenList()\TokenExtra <> #ljEOF
               *p = MakeNode( #ljSEQ, *p, stmt() )
            Wend
            
            Expect( "Left Bracket", #ljRightBrace )
            
         Case #ljEOF
            gExit + 1
            
         Default
            SetError( "Expecting beginning of a statament, found " + llTokenList()\name, 14 )
         
      EndSelect
      
      ProcedureReturn *p
      
   EndProcedure
   
   Procedure         DisplayNode( *p.stTree )
      If *p
         If *p\NodeType = #ljIDENT Or *p\NodeType = #ljINT Or *p\NodeType = #ljSTR
            Debug LSet( gszATR( *p\NodeType ), 30 ) + *p\value
         Else
            Debug LSet( gszATR( *p\NodeType ), 30 )
            DisplayNode( *p\left )
            DisplayNode( *p\right )
         EndIf
      Else
         Debug ";"
      EndIf
   EndProcedure
   
   Procedure         Parse()
      Protected      err
      Protected      *p.stTree
      Protected      total
      Protected.s    temp
      
      Init()
   
      If Scanner()
         ProcedureReturn gLastError
      EndIf
   
      DebugParser()
      
      Debug "---[ Syntax Analysis ]--------------"
   
      FirstElement( llTokenList() )
      total = ListSize( llTokenList() ) - 1
   
      Repeat
         gStack = 0
         *p = MakeNode( #ljSEQ, *p, stmt() )
         
         If gLastError
            Debug gszlastError
            Break 
         EndIf
         
      Until ListIndex( llTokenList() ) >= total Or gExit
      
      DisplayNode( *p )
   
   EndProcedure

EndModule

CompilerIf #PB_Compiler_IsMainFile
   ; -- Module demo
   
   Define         err
   Define         *t.LJLang::stTree
 
   If LJLang::LoadLJ( "demo4.lj" )
      Debug "Error: " + LJLang::Error( @err )
   EndIf
   
   If LJLang::Parse()
      Debug "Error: " + LJLang::Error( @err )
   EndIf
   
CompilerEndIf

EDIT1: Added an example

Example 1

Code: Select all


count = 1;
 while (count < 10) {
     print("count is: ", count, "\n");
     count = count + 1;
 }

EDIT2: 28/Feb/2020 - Major changes

- Some bug fixes
- Made output in line with other examples
- Integrated the syntax analysis

EDIT3: 2/Mar/2020
- Bugfix in syntax generator

Personal note

I am impressed how easy it was to translate the C code into Purebasic.
Last edited by kinglestat on Mon Mar 02, 2020 5:41 pm, edited 8 times in total.
I may not help with your coding
Just ask about mental issues!

http://www.lulu.com/spotlight/kingwolf
http://www.sen3.net
Mesa
Enthusiast
Enthusiast
Posts: 349
Joined: Fri Feb 24, 2012 10:19 am

Re: "C language style" parser

Post by Mesa »

An error occures if the file "demo4.lj" has a BOM.

I use PB 5.72 beta 1 x86.

M.
kinglestat
Enthusiast
Enthusiast
Posts: 732
Joined: Fri Jul 14, 2006 8:53 pm
Location: Malta
Contact:

Re: "C language style" parser

Post by kinglestat »

@mesa I do not understand
I may not help with your coding
Just ask about mental issues!

http://www.lulu.com/spotlight/kingwolf
http://www.sen3.net
Fred
Administrator
Administrator
Posts: 16687
Joined: Fri May 17, 2002 4:39 pm
Location: France
Contact:

Re: "C language style" parser

Post by Fred »

Nice code :)
kinglestat
Enthusiast
Enthusiast
Posts: 732
Joined: Fri Jul 14, 2006 8:53 pm
Location: Malta
Contact:

Re: "C language style" parser

Post by kinglestat »

thanks @Fred
I may not help with your coding
Just ask about mental issues!

http://www.lulu.com/spotlight/kingwolf
http://www.sen3.net
User avatar
Derren
Enthusiast
Enthusiast
Posts: 313
Joined: Sat Jul 23, 2011 1:13 am
Location: Germany

Re: "C language style" parser

Post by Derren »

kinglestat wrote:@mesa I do not understand
you don't read the BOM (Byte Order Mark) using ReadStringFormat().
ReadStringFormat() should be called after opening a file and before reading from it.

So your code only works with UTF-8 files that don't start with a BOM.
If the file starts with a BOM, your programm will not work properly, as it doesn't recognize the first few bytes of the filestream, and will not properly read special characters like "Ü" for example.
User avatar
Tenaja
Addict
Addict
Posts: 1949
Joined: Tue Nov 09, 2010 10:15 pm

Re: "C language style" parser

Post by Tenaja »

kinglestat wrote:@mesa I do not understand
Many, if not most, text editors that handle multiple text formats add a code to the beginning of the file. The code is called the bom. It's a way to let the text editors know what format to read.

Those three bytes are tripping up your code, according to others. The fix is to read it, so the first line of "code" read is actually code.
User avatar
chi
Addict
Addict
Posts: 1034
Joined: Sat May 05, 2007 5:31 pm
Location: Linz, Austria

Re: "C language style" parser

Post by chi »

Thx for sharing :)

There is a little typo

Code: Select all

gszATR(  4 ) = "£hile"
[/size]
Et cetera is my worst enemy
User avatar
idle
Always Here
Always Here
Posts: 5097
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: "C language style" parser

Post by idle »

chi wrote:Thx for sharing :)

There is a little typo

Code: Select all

gszATR(  4 ) = "£hile"
[/size]
We can learn from that, easy to spot bugs that we can fix for dollars pounds or euros.. :lol:
Windows 11, Manjaro, Raspberry Pi OS
Image
User avatar
ChrisR
Addict
Addict
Posts: 1154
Joined: Sun Jan 08, 2017 10:27 pm
Location: France

Re: "C language style" parser

Post by ChrisR »

That's great, thanks for sharing :)
kinglestat
Enthusiast
Enthusiast
Posts: 732
Joined: Fri Jul 14, 2006 8:53 pm
Location: Malta
Contact:

Re: "C language style" parser [Updated]

Post by kinglestat »

Example 2

Code: Select all

/*
 Simple prime number generator
 */
count = 1;
n = 1;
limit = 100;
while (n < limit) {
    k=3;
    p=1;
    n=n+2;
    while ((k*k<=n) && (p)) {
        p=n/k*k!=n;
        k=k+2;
    }
    if (p) {
        print(n, " is prime\n");
        count = count + 1;
    }
}
print("Total primes found: ", count, "\n"); 
I may not help with your coding
Just ask about mental issues!

http://www.lulu.com/spotlight/kingwolf
http://www.sen3.net
infratec
Always Here
Always Here
Posts: 6883
Joined: Sun Sep 07, 2008 12:45 pm
Location: Germany

Re: "C language style" parser [Updated]

Post by infratec »

Regarding to your BOM problem...

This should fix it:

Code: Select all

f = ReadFile( #PB_Any, filename, #PB_UTF8 )

If Not f
  SetError( "Could not open file", -3 )
EndIf

ReadStringFormat(f)

*Mem = AllocateMemory( gMemSize + 16, #PB_Memory_NoClear )
Mesa
Enthusiast
Enthusiast
Posts: 349
Joined: Fri Feb 24, 2012 10:19 am

Re: "C language style" parser [Updated]

Post by Mesa »

Change the Procedure LoadLJ( filename.s ) by this and the BOM problem should be resolved.

Code: Select all

Procedure       LoadLJ( filename.s ) 
				Protected    f, *mem 
			 

				gMemSize = FileSize( filename ) 
				 
				If gMemSize > 0 
						 
				  f = ReadFile( #PB_Any, filename, #PB_UTF8 ) 
				  
				  ReadStringFormat(f) ;JUST ADD THIS
				  
						If Not f 
							SetError( "Could not open file", -3 ) 
						EndIf 
						 
						*Mem = AllocateMemory( gMemSize + 16, #PB_Memory_NoClear ) 
						ReadData( f, *Mem, gMemSize ) 
						CloseFile( f ) 
						 
						CompilerIf( #WithEOL = 1 ) 
							gszFileText = PeekS( *mem, -1, #PB_UTF8 ) + gszEOF 
						CompilerElse 
							gszFileText = PeekS( *mem, -1, #PB_UTF8 ) 
						CompilerEndIf    
							 
						gMemSize = Len( gszFileText ) 
						FreeMemory( *mem ) 
						gpos = 1 
						 
						ProcedureReturn 0 
				EndIf 
				 
				SetError( "Invalid file", -2 ) 
			EndProcedure 
M.
kinglestat
Enthusiast
Enthusiast
Posts: 732
Joined: Fri Jul 14, 2006 8:53 pm
Location: Malta
Contact:

Re: "C language style" parser [Updated]

Post by kinglestat »

Thank you @mesa and @infratec. Much appreciated; had no idea.
I may not help with your coding
Just ask about mental issues!

http://www.lulu.com/spotlight/kingwolf
http://www.sen3.net
Post Reply