Code updated 2008-05-23
This code at the top is now
the (hopefully) bug free version
of the ported code found later
************************
I needed a proc like stringfield() except that it had to handle quoted strings. Often a field will have a comma in it so it gets quotes put on it like:
Firstname,lastname,City,country,DisplayName,email
Paul,Dwer,Tokyo,Japan,35,"Dwyer, Paul",PaulsEmail@PaulsDomain.com
StringField will return
Paul
Dwyer
Tokyo
Japan
36
"Dwyer
Paul"
rather than what excel will show and save:
Paul
Dwer
Tokyo
Japan
36
Dwyer, Paul
PaulsEmail@PaulsDomain.com
I was surprised not to find this proc in the tips area already... so here it is. Hopefully bug free and perhaps needing some performance tweaks. Test code added
Code: Select all
;port from http://www.xbeat.net/vbspeed/c_ParseCSV.php
; some minor deliberate variations
Structure MemoryArray
Byte.c[0]
EndStructure
Declare.l ParseCSV(CSVLine.s, OutVals.s(1))
Dim Vals.s(0)
OpenFile(1,"F:\Programming\PureBasicCode\csv.csv") ;change this!!!!!
While Not Eof(1)
CSVString.s = ReadString(1)
ValCount = ParseCSV(CSVString,Vals())
Debug "Column Count: " + Str(ValCount) + " " + CSVString
For i = 0 To valcount -1
Debug vals(i)
Next
Wend
CloseFile(1)
Procedure.l ParseCSV(CSVLine.s, OutVals.s(1)) ; Returns count
#lAscSpace = 32 ; Asc(" ")
#lAscQuote = 34 ; Asc("""")
#lAscSeparator = 44 ; Asc(","), comma
#lValueNone = 0 ; states of the parser
#lValuePlain = 1
#lValueQuoted = 2
Protected ubValues.l
Protected cntValues.l
Protected lCharCode.l
Protected posStart.l
Protected posEnd.l
Protected cntTrim.l
Protected lState .l
Protected i.l
Protected StrLen.l = Len(CSVLine)
If StrLen > 0
*abExpression.MemoryArray = @CSVLine ; to byte array
ReDim OutVals.s(cntValues)
For i = 0 To StrLen -1
lCharCode = *abExpression\byte[i]
Select lCharCode
Case #lAscSpace
If lState = #lValuePlain
; at non-quoted value: trim 2 unicode bytes for each space
cntTrim = cntTrim + 1;2
EndIf
Case #lAscSeparator
If lState = #lValueNone
; ends zero-length value
ReDim OutVals.s(cntValues)
OutVals(cntValues) = ""
cntValues = cntValues + 1
posStart = i + 1;2
ElseIf lState = #lValuePlain
; ends non-quoted value
lState = #lValueNone
posEnd = i - cntTrim
ReDim OutVals.s(cntValues)
OutVals(cntValues) = Mid(CSVLine, posStart + 1, posEnd - posStart)
OutVals(cntValues) = ReplaceString(OutVals(cntValues),Chr(34)+Chr(34),Chr(34))
cntValues = cntValues + 1
posStart = i + 1;2
cntTrim = 0
EndIf
Case #lAscQuote
If lState = #lValueNone
; starts quoted value
lState = #lValueQuoted
; trims the opening quote
posStart = i + 1;2
ElseIf lState = #lValueQuoted
; ends quoted value, or is a quote within
If *abExpression\byte[i+1] = #lAscQuote
i = i + 1
Else
lState = #lValuePlain
cntTrim = 1;2 ; trims the closing quote
EndIf
EndIf
Default
If lState = #lValueNone
; starts non-quoted value
lState = #lValuePlain
posStart = i
EndIf
; reset trimming
cntTrim = 0
EndSelect
Next
; remainder
posEnd = i - cntTrim
If cntValues <> ubValues
ReDim OutVals.s(cntValues)
EndIf
OutVals(cntValues) = Mid(CSVLine, posStart + 1, posEnd - posStart)
OutVals(cntValues) = ReplaceString(OutVals(cntValues),Chr(34)+Chr(34),Chr(34))
ProcedureReturn cntValues + 1
Else
; (Expression = "")
; return single-element array containing a zero-length string
ReDim OutVals.s(0)
OutVals.s(0) = ""
ProcedureReturn 1
EndIf
EndProcedure