You can pass data as string, array, as sqlite database or as text file
Filtering and aggreagation (e.g. only consideration of specific weekdays or time ranges) is easy possible
Currently supported are the following functions:
GETRANGE: Retrieve Start date and End date of contained data
COUNT: Count values
SUM: Summarize values
MIN: Get lowest value
MAX: Get highest value
AVG: Get AVG
MEDIAN: Get median (50 Percentile)
PERC: Get x Percentile; x=[inum]
STDEV: Standard deviation
VARIANCE: Variance
TREND; Get Trend
Parameters to filter / group
a) "interval", Select only specific timerange:
Datefrom|Dateuntil = DDMMYYYYHHMM|DDMMYYYYHHMM or Datefrom|Dateuntil = DDMMYYYY|DDMMYYYY or TimeFrom|TimeUntil = HHMM|HHMM
b) "weekdays', Select ony given weekdays
Example: weekdays="SO,MO,TU,WE,TH,FR,SA"
c) "groupby', return results grouped in slots
"hour" -> group resulty in slots 00-24 h
"weekday" -> group results by weekdays
"interval" -> group results intimeslots, interval given in minutes in groupbyinterval
Would be great if someone would like to contribute other calculations
Edit:
Version 1.0.1 09.06.2023
Should now run also on win64, pb6.02x64, thanx to idle and jassing
Filtering by time now excludes last second, eg. interval=0800|0900 searches all data between 08:00:00 and 08:59:59
improved samples
Code: Select all
; Module to faciliate statistical computations
; Author: oliver13
; SQLite related code is NOT from me, many thanks To infratec, idle, danilo And other
; https://www.purebasic.fr/english/viewtopic.php?p=516159&hilit=percentile+sqlite#p516159
; Version 1.0.1 09.06.2023
; Should now run also on win64, pb6.02x64, thanx to idle and jassing
; Filtering by time now excludes last second, eg. interval=0800|0900 searches all data between 08:00:00 and 08:59:59
; improved samples
; Version 1.0.0
; 08.06.2023
; --------------------------------------------------------------
DeclareModule StatLib
Structure tData
iDate.i
dValue.d
EndStructure
Declare _intern_MakeDatabase()
Declare _intern_MakeArray(Array arr.tData(1),sData$,bUseTimeOffset=1)
Declare _intern_FillDatabase(db,Array arr.d(1))
Declare.s Func(sFunc$,sparams$="",iNum=0)
Declare Finish(bCloseDB=#False)
Declare StartFromFile(sdbfile$,stable$="data", sdatefield$="date",svaluefield$="val", iDigits=1,sDateFormat$="%dd.%mm.%yyyy %hh:%ii",sSeparator$=",")
Declare StartFromArray(Array tdat.tdata(1), iDigits=1)
Declare StartFromString(sdata$, iDigits=1)
Declare.s SaveMem2Database(sdestfile$="")
Declare UseMemDB(bCloseDB=#False)
EndDeclareModule
Module StatLib
UseSQLiteDatabase()
Global iStatLib_DB
Global sdb_table$
Global sdb_date$
Global sdb_value$
Global idb_digits
Global bIsInit=0
Global bUseMemoryDB=#False
ImportC ""
qsort(*base, nitems.l, size.l, *Prog)
EndImport
CompilerSelect #PB_Compiler_OS
CompilerCase #PB_OS_MacOS
ImportC "/Applications/PureBasic/purelibraries/macos/libraries/libpbsqlite3.a"
CompilerDefault
ImportC "sqlite3.lib"
CompilerEndSelect
sqlite3_create_function(DatabaseID, zFunctionName.p-utf8, nArg.i, eTextRep.i, *pApp, *xFunc, *xStep, *xFinal)
sqlite3_aggregate_context(*sqlite3_context, nBytes.l)
sqlite3_result_double(*sqlite3_context, dbl.d)
sqlite3_value_numeric_type.i(*sqlite3_value)
sqlite3_value_double.d(*sqlite3_value)
sqlite3_busy_timeout(sqlite3, ms);
sqlite3_value_type.i(*sqlite3_value)
sqlite3_realloc(*void, int.l)
sqlite3_free(*void)
sqlite3_result_error(*sqlite3_context, char.p-utf8, int.l)
sqlite3_result_error_nomem(*sqlite3_context)
sqlite3_backup_init(pDest, zDestName.p-ascii, pSource, zSourceName.p-ascii)
sqlite3_backup_step(sqlite3_backup, nPage)
sqlite3_backup_finish(sqlite3_backup)
sqlite3_errcode.l(*db)
sqlite3_errmsg16.l(*sqlite3)
EndImport
#SQLITE_NULL = 5
#SQLITE_UTF8 = 1
#SQLITE_OK = 0
#SQLITE_INTEGER = 1
#SQLITE_FLOAT = 2
#SQLITE3_TEXT = 3
#SQLITE_BLOB = 4
#SQLITE_NULL = 5
#SQLITE_STATIC = 0
#SQLITE_TRANSIENT = -1
CompilerIf Defined (utf8,#PB_Procedure)=#False And #PB_Compiler_Version<560
Procedure UTF8(stext$)
If stext$="":stext$=" ":EndIf
*buf=AllocateMemory(StringByteLength(stext$))
PokeS(*buf,stext$,-1,#PB_UTF8)
ProcedureReturn *buf
EndProcedure
CompilerEndIf
Structure StdevCtx
rM.d
rS.d
cnt.q
EndStructure
Structure Percentile
nAlloc.i ; Number of slots allocated for a[]
nUsed.i ; Number of slots actually used in a[]
rPct.d ; 1.0 more than the value for P
*a.Double ; Array of Y values
EndStructure
Procedure.i _intern_perc_sameValue(a.d, b.d)
a - b
ProcedureReturn Bool(a >= -0.001 And a <= 0.001)
EndProcedure
ProcedureC _intern_perc_percentStep(*pCtx, argc.l, *argv)
Protected *ptr, *a
Protected *p.Percentile
Protected.d rPct
Protected.i eType
Protected.d y
Protected.i n
; Requirement 3: P must be a number between 0 And 100
*ptr = PeekI(*argv + 1 * SizeOf(Long))
eType = sqlite3_value_numeric_type(*ptr)
rPct = sqlite3_value_double(*ptr);
If (eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT) Or (rPct < 0.0 Or rPct > 100.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not a number between 0.0 And 100.0", -1)
ProcedureReturn
EndIf
; Allocate the session context.
*p = sqlite3_aggregate_context(*pCtx, SizeOf(Percentile))
If *p = 0
ProcedureReturn
EndIf
; Remember the P value. Throw an error If the P value is different
; from any prior row, per Requirement (2).
If *p\rPct = 0.0
*p\rPct = rPct; + 1.0
ElseIf Not _intern_perc_sameValue(*p\rPct, rPct); + 1.0)
sqlite3_result_error(*pCtx, "2nd argument to percentile() is not the same For all input rows", -1)
ProcedureReturn
EndIf
; Ignore rows For which Y is NULL
*ptr = PeekI(*argv + 0 * SizeOf(Long))
eType = sqlite3_value_type(*ptr)
If eType = #SQLITE_NULL
ProcedureReturn
EndIf
; If Not NULL, then Y must be numeric. Otherwise throw an error.
; Requirement 4
If eType <> #SQLITE_INTEGER And eType <> #SQLITE_FLOAT
sqlite3_result_error(*pCtx, "1st argument to percentile() is not numeric", -1)
ProcedureReturn
EndIf
; Throw an error If the Y value is infinity Or NaN
y = sqlite3_value_double(*ptr)
If IsInfinity(y)
sqlite3_result_error(*pCtx, "Inf input to percentile()", -1)
ProcedureReturn
EndIf
; Allocate And store the Y
If *p\nUsed >= *p\nAlloc
n = *p\nAlloc * 2 + 250
*a = sqlite3_realloc(*p\a, 8 * n)
If *a = 0
sqlite3_free(*p\a)
ClearStructure(*p, Percentile)
sqlite3_result_error_nomem(*pCtx)
ProcedureReturn
EndIf
*p\nAlloc = n
*p\a = *a
EndIf
;*p\a[*p\nUsed] = y
PokeD(*p\a + 8 * *p\nUsed, y)
*p\nUsed + 1
EndProcedure
ProcedureC.l _intern_perc_doubleCmp(*pA.Double, *pB.Double)
Protected Result.l
Result = 1
If *pA\d = *pB\d
Result = 0
ElseIf *pA\d < *pB\d
Result = -1
EndIf
ProcedureReturn Result
EndProcedure
ProcedureC _intern_perc_percentFinal(*pCtx)
Protected *p.Percentile
Protected.i i1, i2
Protected.d v1, v2
Protected.d ix, vx
Protected.i i
Protected.d v11, v22
*p = sqlite3_aggregate_context(*pCtx, 0)
If *p
If *p\a <> 0
If *p\nUsed
If *p\rPct > 0
If *p\rPct = 100
vx = PeekD(*p\a + 8 * (*p\nUsed - 1))
Else
qsort(*p\a, *p\nUsed, SizeOf(double), @_intern_perc_doubleCmp())
; Debug "-----"
; For i = 0 To *p\nUsed - 1
; Debug Str(i) + ": " + StrD(PeekD(*p\a + 8 * i))
; Next i
; Debug "+++++"
i1 = Round(*p\rPct / 100 * *p\nUsed, #PB_Round_Up) - 1
vx = PeekD(*p\a + 8 * i1)
EndIf
Else
vx = 0
EndIf
sqlite3_result_double(*pCtx, vx)
EndIf
sqlite3_free(*p\a)
EndIf
ClearStructure(*p, Percentile)
EndIf
EndProcedure
ProcedureC _intern_stdevFinalize(*context)
Protected *p.StdevCtx
*p = sqlite3_aggregate_context(*context, 0)
If *p And *p\cnt > 1
sqlite3_result_double(*context, Sqr(*p\rS / (*p\cnt-1)))
Else
sqlite3_result_double(*context, 0.0)
EndIf
EndProcedure
;
; Returns the variance value
;
ProcedureC _intern_varianceFinalize(*context)
Protected *p.StdevCtx
*p = sqlite3_aggregate_context(*context, 0)
If *p And *p\cnt > 1
sqlite3_result_double(*context, *p\rS / (*p\cnt-1))
Else
sqlite3_result_double(*context, 0.0)
EndIf
EndProcedure
;
; called for each value received during a calculation of stdev or variance
;
ProcedureC _intern_varianceStep(*context, argc.l, *argv.Integer)
Protected *p.StdevCtx
Protected delta.d, x.d
If argc=1
*p = sqlite3_aggregate_context(*context, SizeOf(StdevCtx))
; only consider non-null values
If #SQLITE_NULL <> sqlite3_value_numeric_type(*argv\i)
*p\cnt + 1
x = sqlite3_value_double(*argv\i)
delta = (x - *p\rM)
*p\rM + delta / *p\cnt
*p\rS + delta * (x - *p\rM)
EndIf
EndIf
EndProcedure
Procedure _intern_RegisterSQLiteFunctions(dataBase)
Protected dataBaseID = DatabaseID(dataBase)
sqlite3_create_function(dataBaseID, "stdev" , 1, #SQLITE_UTF8, 0, 0, @_intern_varianceStep(), @_intern_stdevFinalize())
sqlite3_create_function(dataBaseID, "variance", 1, #SQLITE_UTF8, 0, 0, @_intern_varianceStep(), @_intern_varianceFinalize())
sqlite3_create_function(databaseID, "perc", 2, #SQLITE_UTF8, 0, 0, @_intern_perc_percentStep(), @_intern_perc_percentFinal())
EndProcedure
;-----------------------------
;- Public functions
Procedure _intern_MakeDatabase(); helper function
Protected db=OpenDatabase(#PB_Any, ":memory:", "", "")
If IsDatabase(db)
_intern_RegisterSQLiteFunctions(db)
If DatabaseUpdate(db,"CREATE TABLE data (time INTEGER, val DOUBLE)")
Debug "create table: success"
sdb_table$="data"
sdb_value$="val"
sdb_date$="time"
idb_digits=1
ProcedureReturn db
Else
Debug "create table: error (" + DatabaseError()+")"
EndIf
EndIf
EndProcedure
Procedure _intern_MakeArray(Array arr.tData(1),sData$, bUseTimeOffset=1)
If Right(sData$,1)<>"|"
sData$+"|"
EndIf
Protected iCount=CountString(sData$,"|")-1
Protected iTimeOffset.d=86400/(iCount+1) ; calc even interval between data values, time range 24 hours
Protected itime=0
ReDim arr(iCount)
For i=0 To iCount
srecord$=StringField(sData$,i+1,"|")
sval$=StringField(srecord$+"^",2,"^")
sdat$=StringField(srecord$+"^",1,"^")
If sdat$<>""
arr(i)\iDate=ParseDate("%dd%mm%yyyy%hh%ii",sdat$)
Else
If bUseTimeOffset=1 ; otherwise nodat
arr(i)\iDate=iTime
EndIf
EndIf
arr(i)\dValue=ValD(sval$)
iTime+iTimeOffset
Next
Debug "makearray: "+Str(icount+1 )+" items" ; consider zero index
EndProcedure
Procedure _intern_FillDatabase(db,Array arr.tData(1))
Protected bRet=#False
If IsDatabase(db)
If DatabaseUpdate(db, "DELETE FROM data")
Debug "clear table: success"
Else
Debug "clear table: error ("+DatabaseError()+")"
ProcedureReturn
EndIf
Else
db=_intern_MakeDatabase()
EndIf
Protected iSize=ArraySize(arr())
DatabaseUpdate(db, "BEGIN TRANSACTION")
Protected iTime.d=0
For i=0 To iSize
DatabaseUpdate(db, "INSERT INTO data (time,val) VALUES ("+Str(arr(i)\iDate)+","+StrD(arr(i)\dValue)+")")
Next
DatabaseUpdate(db, "END TRANSACTION")
If DatabaseQuery(db,"SELECT count(time) from data")
If NextDatabaseRow(db)
iFound=GetDatabaseLong(db,0)
If iFound=ArraySize(arr())+1
bRet=#True
EndIf
EndIf
FinishDatabaseQuery(db)
EndIf
If bRet=1
Debug "fill table: success ("+Str(iFound)+" items)"
Else
Debug "fill table: error ("+DatabaseError()+")"
EndIf
ProcedureReturn bRet
EndProcedure
Procedure UseMemDB(bCloseDB=#False)
; Copy database to memory and use memory database
If IsDatabase(iStatLib_DB)=#False
Debug "ERROR: NO VALID DATABASE"
ProcedureReturn
EndIf
dbmem = OpenDatabase(#PB_Any, ":memory:", "", "", #PB_Database_SQLite)
If IsDatabase(dbmem)
sqlite3_busy_timeout(DatabaseID(Dbmem), 120000)
sqlite3_busy_timeout(DatabaseID(iStatLib_DB), 120000)
BackUp = sqlite3_backup_init(DatabaseID(Dbmem), "main", DatabaseID(iStatLib_DB), "main")
If BackUp
; Und Daten von :memory:-DB in die Datei-DB übertragen
sqlite3_backup_step (Backup, -1)
sqlite3_backup_finish(Backup)
EndIf
Debug sqlite3_errcode(DatabaseID(dbmem))
If DatabaseUpdate(dbmem,"CREATE INDEX `i1` ON `"+sdb_table$+"` (`"+sdb_date$+"` ASC,`"+sdb_value$+"` ASC)")=0
Debug DatabaseError()
EndIf
If bCloseDB=#True
CloseDatabase(iStatLib_DB)
EndIf
iStatLib_DB=dbmem
_intern_RegisterSQLiteFunctions(iStatLib_DB)
bUseMemoryDB=#True
ProcedureReturn #True
Else
Debug DatabaseError()
EndIf
EndProcedure
Procedure.s Func(sFunc$,sparams$="",inum=0)
;sInterval$="",sCriteria$="",iNum=0)
;Params:
; sFunc: Function
; [GETRANGE]: Retrieve Start date and End date of contained data
; [COUNT]: Count values
; [SUM]: Summarize values
; [MIN]: Get lowest value
; [MAX]: Get highest value
; [AVG]: Get AVG
; [MEDIAN]: Get median (50 Percentile)
; [PERC]: Get x Percentile; x=[inum]
; [STDEV]: Standard deviation
; [VARIANCE]: Variance
; [TREND]; Get Trend
If bIsInit=#False
Debug "ERROR: NOT INITIALIZED"
ProcedureReturn "ERROR: NOT INITIALIZED"
EndIf
If IsDatabase(iStatLib_DB)=#False
Debug "ERROR: NO VALID DATABASE"
ProcedureReturn "ERROR: NO VALID DATABASE"
EndIf
Protected sInterval$=""
Protected sWeekdays$=""
Protected sCriteria$=""
Protected sgroupby1$=""
Protected sgroupby2$=""
If sparams$<>""
Protected NewMap mp.s()
js=ParseJSON(#PB_Any,sparams$)
If IsJSON(js)
ExtractJSONMap(JSONValue(js), mp())
FreeJSON(js)
If FindMapElement(mp(),"interval")>0
;Datefrom|Dateuntil = DDMMYYYYHHMM|DDMMYYYYHHMM or Datefrom|Dateuntil = DDMMYYYY|DDMMYYYY or TimeFrom|TimeUntil = HHMM|HHMM
sInterval$=mp()
EndIf
If FindMapElement(mp(),"criteria")>0
scriteria$=mp()
EndIf
If FindMapElement(mp(),"weekdays")>0
For i=0 To 6
If FindString(mp(),StringField("SO,MO,TU,WE,TH,FR,SA",i+1,","))
sWeekdays$+Str(i)+","
EndIf
Next
sWeekdays$=Trim(sWeekdays$,",")
EndIf
If FindMapElement(mp(),"groupby")>0
Select mp()
Case "hour"
sgroupby1$=" ,strftime('%H',[TIME],'unixepoch') As 'v_h' "
sgroupby2$=" group by v_h "
Case "interval"
sg$=mp("groupbyinterval")
If sg$="":sg$="1":EndIf
sgroupby1$=" ,((CAST (strftime('%H',[TIME],'unixepoch') AS integer) *60) + (CAST (strftime('%M',[TIME],'unixepoch') AS integer))) /"+sg$+" As 'v_minute' "
sgroupby2$=" group by v_minute"
Case "weekday"
sgroupby1$=" ,strftime('%w',[TIME],'unixepoch') As 'v_weekday' "
sgroupby2$=" group by v_weekday "
EndSelect
EndIf
EndIf
EndIf
Static Dim dData.tData(0)
Static sOldParams$
Protected sret$
Protected iType=-1
Protected sNewParams$=sfunc$+"|"+sInterval$+sCriteria$
If sInterval$<>""
sfrom$=StringField(sInterval$,1,"|")
suntil$=StringField(sInterval$,2,"|")
If sfrom$<>"" And suntil$<>"" And Len(sfrom$)<>Len(suntil$)
Debug "ERROR: INTERVAL IS NOT IN SAME FORMAT"
ProcedureReturn "ERROR: INTERVAL IS NOT IN SAME FORMAT"
EndIf
If sfrom$<>"" And suntil$<>"" ; both data given
Select Len(sfrom$)
Case 12 ; full date DDMMYYYYHHII
sdt$=" (date([TIME],'unixepoch') BETWEEN "+Str(ParseDate("%dd%mm%yyyy%hh%ii",sfrom$))+" AND "+Str(ParseDate("%dd%mm%yyyy%hh%ii",suntil$))+")"
Case 8 ; only date DDMMYYYY
sdt$="([TIME] BETWEEN "+Str(ParseDate("%dd%mm%yyyy",sfrom$))+" AND "+Str(ParseDate("%dd%mm%yyyy",suntil$)+86399)+")"
Case 4 ; only date DDMMYYYY
suntil$=FormatDate("%hh%ii",ParseDate("%hh%ii",suntil$)-1);
sdt$="(time([TIME],'unixepoch') BETWEEN '"+FormatDate("%hh:%ii:00",ParseDate("%hh%ii",sfrom$))+"' AND '"+FormatDate("%hh:%ii:59",ParseDate("%hh%ii",suntil$))+"')"
Default:
Debug "ERROR: INTERVAL IS NOT IN SAME FORMAT "
ProcedureReturn "ERROR: INTERVAL IS NOT IN SAME FORMAT "
EndSelect
ElseIf sfrom$<>""
Select Len(sfrom$)
Case 12 ; full date DDMMYYYYHHII
sdt$=" (date([TIME],'unixepoch') >= "+Str(ParseDate("%dd%mm%yyyy%hh%ii",sfrom$))+")"
Case 8 ; only date DDMMYYYY
sdt$="([TIME] >= "+Str(ParseDate("%dd%mm%yyyy",sfrom$))+")"
Case 4 ; only date DDMMYYYY
sdt$="(time([TIME],'unixepoch') BETWEEN '"+FormatDate("%hh:%ii:00",ParseDate("%hh%ii",sfrom$))+"' AND '23:59:59')"
Default:
Debug "ERROR: INTERVAL (STARTDATE) IS NOT IN PROPER FORMAT "
ProcedureReturn "ERROR: INTERVAL (STARTDATE) IS NOT IN PROPER FORMAT "
EndSelect
ElseIf suntil$<>""
Select Len(suntil$)
Case 12 ; full date DDMMYYYYHHII
sdt$=" (date([TIME],'unixepoch') < "+Str(ParseDate("%dd%mm%yyyy%hh%ii",suntil$))+")"
Case 8 ; only date DDMMYYYY
sdt$="([TIME] <= "+Str(ParseDate("%dd%mm%yyyy",suntil$)+86399)+")"
Case 4 ; only date DDMMYYYY
suntil$=FormatDate("%hh:%ii",ParseDate("%hh%ii",suntil$)-1);
sdt$="(time([TIME],'unixepoch') BETWEEN '00:00:00' AND '"+FormatDate("%hh:%ii:59",ParseDate("%hh%ii",suntil$))+"')"
Default
Debug "ERROR: INTERVAL (ENDDATE) IS NOT IN PROPER FORMAT "
ProcedureReturn "ERROR: INTERVAL ENDDATE) IS NOT IN PROPER FORMAT "
EndSelect
EndIf
scrit$=" WHERE "+sdt$
EndIf
If sWeekdays$<>""
If scrit$=""
scrit$=" WHERE (CAST (strftime('%w',[TIME],'unixepoch') as integer) IN ("+sWeekdays$+"))"
Else
scrit$+ " AND ( CAST (strftime('%w',[TIME],'unixepoch') as integer) IN ("+sWeekdays$+"))"
EndIf
EndIf
If sCriteria$<>""
If scrit$=""
scrit$=" WHERE "+sCriteria$
Else
scrit$+ " AND ("+sCriteria$+")"
EndIf
EndIf
Select UCase(sFunc$)
Case "GETRANGE"
sq$="select min ([TIME]), max([TIME]) from [TABLE]"
iType=1
Case "AVG","STDEV","VARIANCE","COUNT","SUM","MIN","MAX"
sq$="select "+LCase(sFunc$)+"([VAL]) [GROUPBY1] from [TABLE] [CRIT] [GROUPBY2]"
iType=1
Case "PERC","MEDIAN"; Percentile nearest rank methode
If sFunc$="MEDIAN"
sFunc$="PERC"
iNum=50
EndIf
sq$="select "+LCase(sFunc$)+"([VAL],"+Str(inum)+") [GROUPBY1] from [TABLE] [CRIT] [GROUPBY2]"
iType=1
Case "CV"
sq$="select stdev([VAL]),avg([VAL]) [GROUPBY1] from [TABLE] [CRIT] [GROUPBY2]"
iType=4
Case "TREND"
sq$="Select [TIME],[VAL] from [TABLE] [CRIT] order by [TIME] ASC"
If Len(sfrom$)=12 Or Len (sfrom$)=4
sq$="select datetime([TIME],'unixepoch') As 'v_datetime',time([TIME],'unixepoch') As 'v_time',* from [TABLE] [CRIT] order by v_time asc, v_datetime asc"
EndIf
iType=3
EndSelect
sq$=ReplaceString(sq$,"[CRIT]",scrit$)
sq$=ReplaceString(sq$,"[GROUPBY1]",sgroupby1$)
sq$=ReplaceString(sq$,"[GROUPBY2]",sgroupby2$)
sq$=ReplaceString(sq$,"[VAL]",sdb_value$)
sq$=ReplaceString(sq$,"[TABLE]",sdb_table$)
sq$=ReplaceString(sq$,"[TIME]",sdb_date$)
Debug sq$
Protected vd=Date(1970,1,1,0,0,0)
Protected sdat$
Select itype
Case 0
;
Case 1
If DatabaseQuery(iStatLib_DB,sq$)
While NextDatabaseRow(iStatLib_DB)
sret$+StrD(GetDatabaseDouble(iStatLib_DB,0),idb_digits)+"^"+ GetDatabaseString(iStatLib_DB,1)+"|"
Wend
FinishDatabaseQuery(iStatLib_DB)
Else
Debug DatabaseError()
EndIf
Case 2,3;- perc
Protected ic=0
If sOldParams$<>sNewParams$ Or StringField(sOldParams$,1,"|")<>sFunc$
ReDim dData.tData(1000)
If DatabaseQuery(iStatLib_DB,sq$)
While NextDatabaseRow(iStatLib_DB)
dData(ic)\iDate=GetDatabaseDouble(iStatLib_DB,0)
dData(ic)\dValue=GetDatabaseDouble(iStatLib_DB,1)
If ic%1000
ReDim dData(ic+1000)
EndIf
ic+1
Wend
FinishDatabaseQuery(iStatLib_DB)
ReDim dData(ic)
Else
Debug DatabaseError()
EndIf
Else
ic=ArraySize(dData())
EndIf
If ic>0
Select itype
Case 2: ; percentile
dRank.d=ic*(iNum/100)
If drank=Int(dRank)
irank.i=drank
dPercentile.d=(ddata(iRank)\dValue+dData(irank+1)\dValue)/2
Else
iRank=Round(dRank,#PB_Round_Up)
dPercentile.d=ddata(iRank)\dValue
EndIf
sret$=StrD(dPercentile,idb_digits);+"^"+ Str(ic)
Case 3: ; Trend
; calculate 2 data rows
Protected ddatarow1.d =0
Protected dDatarow2.d =0
For i=0 To ic/2
dDatarow1+dData(i)\dValue
;Debug dData(i)\dValue
Next
dDatarow1=dDatarow1/(ic/2)
For i=ic/2 To ic
dDatarow2+dData(i)\dValue
Next
dDatarow2=dDatarow2/(ic/2)
;iPercent=(dDatarow2-dDatarow1)/(dData(ic)\dValue-dData(0)\dValue)
sret$=StrD(dDatarow2-dDatarow1,idb_digits+1)+"|"+ StrD(dDatarow1,idb_digits+1)+"^"+ StrD(dDatarow2,idb_digits+1)
EndSelect
EndIf
Case 4;- CV
If DatabaseQuery(iStatLib_DB,sq$)
While NextDatabaseRow(iStatLib_DB)
sret$+StrD((GetDatabaseDouble(iStatLib_DB,0)/GetDatabaseDouble(iStatLib_DB,1))*100,1)+"%"+"^"+ GetDatabaseString(iStatLib_DB,2)+"|"
Wend
FinishDatabaseQuery(iStatLib_DB)
Else
Debug DatabaseError()
EndIf
EndSelect
If iNum<>0
sparam$="("+Str(inum)+")"
EndIf
Debug sFunc$+sparam$+": "+ Trim(sret$,"|")+#CRLF$
sOldParams$=sNewParams$
ProcedureReturn Trim(Trim(sret$,"|"),"^")
EndProcedure
Procedure.s SaveMem2Database(sdestfile$="")
If bIsInit=#False
Debug "ERROR: NOT INITIALIZED"
ProcedureReturn "ERROR: NOT INITIALIZED"
EndIf
If IsDatabase(iStatLib_DB)=#False
Debug "ERROR: NO VALID DATABASE"
ProcedureReturn "ERROR: NO VALID DATABASE"
EndIf
Protected db, ff
dbmem=iStatLib_DB
If sdestfile$=""
sdestfile$="data.db"
EndIf
If IsDatabase(dbmem)
FF = CreateFile(#PB_Any,sdestfile$)
If FF
CloseFile(FF)
Db = OpenDatabase(#PB_Any, sdestfile$, "", "", #PB_Database_SQLite)
BackUp = sqlite3_backup_init(DatabaseID(Db), "main", DatabaseID(Dbmem), "main")
If BackUp
; Und Daten von :memory:-DB in die Datei-DB übertragen
sqlite3_backup_step (Backup, -1)
sqlite3_backup_finish(Backup)
EndIf
Debug sqlite3_errcode(DatabaseID(db))
CloseDatabase(Db)
EndIf
EndIf
ProcedureReturn sdestfile$
EndProcedure
Procedure Finish(bCloseDB=#False)
If bUseMemoryDB=#True
CloseDatabase(iStatLib_DB)
EndIf
If bCloseDB=#True
If IsDatabase(iStatLib_DB)
CloseDatabase(iStatLib_DB)
EndIf
EndIf
bUseMemoryDB=0
iStatLib_DB=0
bIsInit=#False
EndProcedure
Procedure StartFromFile(sdbfile$,stable$="data", sdatefield$="date",svaluefield$="val", iDigits=1,sDateFormat$="%dd.%mm.%yyyy %hh:%ii",sSeparator$=",")
If bIsInit=#False
sdb_table$=stable$
sdb_value$=svaluefield$
sdb_date$=sdatefield$
If Val(sdbfile$)<>0
db=Val(sdbfile$)
If IsDatabase(db)
iStatLib_DB= db
_intern_RegisterSQLiteFunctions(iStatLib_DB)
Else
Debug "ERROR: INVALID DATABASE"
ProcedureReturn
EndIf
Else
Select GetExtensionPart(sdbfile$)
Case "db"
db=OpenDatabase(#PB_Any,sdbfile$,"","")
If IsDatabase(db)
iStatLib_DB= db
UseMemDB(#True)
Else
Debug "ERROR: INVALID DATABASE"
ProcedureReturn
EndIf
Case "csv","txt"
Protected Dim dArr.tdata(10000)
Protected ic=0
Protected iDateField, iValField
of=ReadFile(#PB_Any,sdbfile$,#PB_File_SharedRead|#PB_File_NoBuffering)
If IsFile(of)
sh$=ReadString(of)
For i=1 To CountString(sh$+sSeparator$,sSeparator$)
If StringField(sh$,i,sSeparator$)=sdatefield$
iDateField=i
EndIf
If StringField(sh$,i,sSeparator$)=svaluefield$
iValField=i
EndIf
Next
If iDateField<>0 And iValField<>0
While Not Eof(of)
sg$=ReadString(of)
sv$=ReplaceString(StringField(sg$,iValField,sSeparator$),",",".")
dArr(ic)\dValue=ValD(sv$)
dArr(ic)\iDate=ParseDate(sDateFormat$,StringField(sg$,iDateField,sSeparator$))
If ic%10000=0
Debug "read:"+Str(ic)
ReDim darr(ic+10000)
EndIf
ic+1
Wend
If ic>0
ReDim darr(ic-1)
EndIf
CloseFile(of)
iStatLib_DB=_intern_MakeDatabase()
_intern_FillDatabase(iStatLib_DB,darr())
Else
Debug "ERROR: ILLEGAL FILE FORMAT"
CloseFile(of)
ProcedureReturn
EndIf
EndIf
EndSelect
EndIf
idb_digits=iDigits
bIsInit=#True
EndIf
ProcedureReturn bIsInit
EndProcedure
Procedure StartFromArray(Array ddata.tdata(1), iDigits=1)
If bIsInit=#False
iStatLib_DB=_intern_MakeDatabase()
_intern_FillDatabase(iStatLib_DB,ddata())
bIsInit=#True
EndIf
ProcedureReturn bIsInit
EndProcedure
Procedure StartFromString(sdata$, iDigits=1)
If bIsInit=#False
Dim dArr.tdata(0)
iStatLib_DB=_intern_MakeDatabase()
_intern_MakeArray(darr(),sdata$)
_intern_FillDatabase(iStatLib_DB,darr())
bIsInit=#True
EndIf
ProcedureReturn bIsInit
EndProcedure
EndModule
CompilerIf #PB_Compiler_IsIncludeFile=#False;- Test ---------------------------------
UseModule StatLib
; Sample 1: analyse data string
Dim dArr.tdata(0)
sg$="010520230122^2|010520230546^4|010520230600^4|010520230710^4|010520230820^5|010520231100^5|010520231200^7|010520231400^9|010520231600^8|010520231800^9|010520232100^10|"
StatLib::_intern_MakeArray(darr(),sg$)
If StatLib::StartFromArray(darr())
StatLib::Func("AVG") ;get Overall Average
StatLib::Func("COUNT"); Get Overall Count
StatLib::Func("STDEV") ;get Standard Deviation
StatLib::Func("CV") ; get variation coeffezient
StatLib::Func("MIN"); get lowest Value
StatLib::Func("MAX"); get highest value
StatLib::Finish()
Debug "Duration:"+StrD((ElapsedMilliseconds()-iStart)/1000,1)
EndIf
; Sample 2: analyse database (from handle)
stable$="data"
stime$="time"
sval$="val"
db=StatLib::_intern_MakeDatabase()
If IsDatabase(db)
vd=Date(2023,1,1,0,0,0)
vd2=Date()
iStep=900 ; create data in interval of 15 minutes (=each 900 seconds)
DatabaseUpdate(db,"BEGIN TRANSACTION")
For i=vd To vd2
If i%istep=0
ddata.d= 100*Cos(i/10)*Sin(i/100)
If DatabaseUpdate(db,"INSERT INTO data (time,val) VALUES("+Str(i)+","+StrD(ddata,0)+");")=1
Else
Debug DatabaseError()
EndIf
EndIf
Next
DatabaseUpdate(db,"END TRANSACTION")
NewMap mp.s();
mp("interval")="25022023|26052023"; consider only values with timestamp between feb 25th,23 and May 26th, 23
mp("weekdays")="MO,TU,FR" ; consider only Monday,Tuedasy and Fridays
mp("criteria")="[VAL]>0"
mp("groupby")="weekdays"
If StatLib::StartfromFile(Str(db),stable$,stime$,sval$,0)
js= CreateJSON(#PB_Any)
InsertJSONMap(JSONValue(js), mp())
sparam$=ComposeJSON(js)
iStart=ElapsedMilliseconds()
StatLib::Func("AVG") ;get Overall Average
StatLib::Func("COUNT"); Get Overall Count
; now get filtered data
StatLib::Func("COUNT",sparam$) ;get Median
StatLib::Func("AVG",sparam$) ;get Average
StatLib::Func("MEDIAN",sparam$) ;get Median
StatLib::Func("PERC",sparam$,10);get 10. Percentile
StatLib::Func("STDEV",sparam$) ;get Standard Deviation
StatLib::Func("CV",sparam$) ; get variation coeffezient
StatLib::Func("MIN",sparam$); get lowest Value
StatLib::Func("MAX",sparam$); get highest value
StatLib::Func("SUM",sparam$) ;get Median
StatLib::Func("VARIANCE",sparam$);get Variance
DeleteFile ("sample.db")
StatLib::SaveMem2Database("sample.db")
FreeJSON(js)
StatLib::Finish()
Debug "Duration:"+StrD((ElapsedMilliseconds()-iStart)/1000,1)
EndIf
CloseDatabase(db)
EndIf
; ; Sample 3: analyse database (from file)
iStart=ElapsedMilliseconds()
ClearMap (mp())
mp("interval")="0830|1200" ; consider only values with timestamp between 08:30am and 1200pm
mp("groupby")="interval"
mp("groupbyinterval")="15" ; ; group in sections of 15 minutes
js= CreateJSON(#PB_Any)
InsertJSONMap(JSONValue(js), mp())
sparam$=ComposeJSON(js)
If StatLib::StartfromFile("sample.db","data","time","val"): StatLib::UseMemDB(#True)
StatLib::Func("COUNT"); Get Overall Count
StatLib::Func("Min") ;Mininum
StatLib::Func("Max") ;max
; now get filtered data
vd=Date(2020,1,1,0,0,0)
Debug "Now AVG grouped by timeslots"
sret$=StatLib::Func("AVG",sparam$) ;get Average
For i=1 To CountString(sret$,"|")+1
sslot$=StringField(sret$,i,"|")
Debug FormatDate("%hh:%ii:%ss",AddDate(vd,#PB_Date_Minute,Val(StringField(sslot$,2,"^"))*Val(mp("groupbyinterval")))) +" - "+ FormatDate("%hh:%ii:%ss",AddDate(vd,#PB_Date_Minute,((Val(StringField(sslot$,2,"^"))+1)*Val(mp("groupbyinterval"))))-1) +" -->"+ StringField(sslot$,1,"^")
Next
StatLib::Finish()
Debug "Duration:"+StrD((ElapsedMilliseconds()-iStart)/1000,1)
EndIf
End
CompilerEndIf