Do statistical calculations with the help of R

Share your advanced PureBasic knowledge/code with the community.
Little John
Addict
Addict
Posts: 4519
Joined: Thu Jun 07, 2007 3:25 pm
Location: Berlin, Germany

Do statistical calculations with the help of R

Post by Little John »

The following code shows how the free programming language for statistical computing R can be called from PureBasic.

Code: Select all

; -- Wrapper for using the R language from PureBasic
;    Version 0.70
; tested with PB 6.02 LTS and R version 4.2.2
; <https://www.purebasic.fr/english/viewtopic.php?t=80250>

DeclareModule R
   ; Error codes
   Enumeration -1 Step -1
      #Err_SettingPath
      #Err_CreatingScript
      #Err_RunningScript
      #Err_ExitCode
      #Err_UnknownMethod
      #Err_UnbalancedValues
   EndEnumeration
   
   ; Structures
   Structure LoUpD
      lo.d
      up.d
   EndStructure
   
   ; -- General functions
   Declare.i SetPath (path$)
   Declare.s Raw (commands$)
   Declare.i LastError()
   
   ; -- Examples of special functions
   Declare.d cor (x$, y$, *cor.Double, method$="pearson")
   Declare.d lm (x$, y$, *slope.Double, *ic.Double)
   Declare.i binom_confint (k.i, n.i, level.d, *bounds.LoUpD)
EndDeclareModule


Module R
   EnableExplicit
   
   CompilerIf #PB_Compiler_OS = #PB_OS_Windows
      #NL$ = #CRLF$
   CompilerElse
      #NL$ = #LF$
   CompilerEndIf
   
   Global g_LastError.i = 0
   Define s_Rscript$
   
   
   Procedure.i LastError()
      ; out: return value: code of the most recent error,
      ;                    or 0 on success
      Protected temp.i=0
      
      Swap temp, g_LastError   ; set g_LastError to 0
      ProcedureReturn temp
   EndProcedure
   
   
   Procedure.i SetPath (path$)
      ; in : path$: absolute or relative (in relation to the main program) path to the program 'Rscript'
      ; out: return value: 0 on success,
      ;                    or < 0 on error
      Shared s_Rscript$
      
      If FileSize(path$) = -1
         g_LastError = #Err_SettingPath
         ProcedureReturn #Err_SettingPath
      EndIf
      
      s_Rscript$ = path$
      ProcedureReturn 0
   EndProcedure
   
   
   Procedure.i _RunScript (commands$, List rOut$())
      ; in : commands$: commands to be executed with Rscript, separated by #LF$
      ; out: rOut$()     : output of Rscript
      ;      return value: 0 on success,
      ;                    or <> 0 on error
      Shared s_Rscript$
      Protected script$, ofn.i, prog.i, ret.i
      
      ClearList(rOut$())
      script$ = GetTemporaryDirectory() + "script.r"
      
      ofn = CreateFile(#PB_Any, script$)
      If ofn = 0
         g_LastError = #Err_CreatingScript
         ProcedureReturn #Err_CreatingScript
      EndIf
      
      WriteStringN(ofn, commands$)
      CloseFile(ofn)
      
      ; -- Execute 'Rscript' and read its output
      prog = RunProgram(s_Rscript$, script$, "", #PB_Program_Open | #PB_Program_Read | #PB_Program_Hide)
      If prog = 0
         g_LastError = #Err_RunningScript
         ProcedureReturn #Err_RunningScript
      EndIf
      
      While ProgramRunning(prog)
         If AvailableProgramOutput(prog)
            AddElement(rOut$())
            rOut$() = ReadProgramString(prog)
         EndIf
      Wend
      ret = ProgramExitCode(prog)
      CloseProgram(prog)
      DeleteFile(script$)
      
      ProcedureReturn ret
   EndProcedure
   
   
   Procedure.s Raw (commands$)
      ; in : commands$: commands to be executed with Rscript, separated by #LF$
      ; out: return value: commands prepended with "> " plus output of Rscript
      Protected err.i, ret$
      Protected NewList rOut$()
      
      err = _RunScript(commands$, rOut$())
      If err <> 0
         g_LastError = err
      EndIf
      
      ret$ = "> " + ReplaceString(commands$, #LF$, #NL$ + "> ")
      ForEach rOut$()
         ret$ + #NL$ + rOut$()
      Next
      
      ProcedureReturn ret$
   EndProcedure
   
   
   Procedure.d cor (x$, y$, *cor.Double, method$="pearson")
      ; -- Correlation
      ; in : x$, y$ : "parallel lists" of numeric values, separated by ',' (e.g. "1,2,3")
      ;      method$: 'pearson', 'kendall', or 'spearman' (can be abbreviated)
      ; out: cor\d       : correlation coefficient (from the interval [-1, 1])
      ;      return value: p-value (from the interval [0, 1]),
      ;                    or < 0 on error
      Protected commands$, err.i, p.i, ret.d
      Protected NewList rOut$()
      
      If CountString(x$, ",") <> CountString(y$, ",")
         g_LastError = #Err_UnbalancedValues
         ProcedureReturn #Err_UnbalancedValues
      EndIf
      
      If FindString(" pearson kendall spearman", " "+method$) = 0
         g_LastError = #Err_UnknownMethod
         ProcedureReturn #Err_UnknownMethod
      EndIf
      
      commands$ = "x <- c(" + x$ + ")" + #LF$ +
                  "y <- c(" + y$ + ")" + #LF$ +
                  "cor.test(x, y, method='" + method$ + "')"
      
      err = _RunScript(commands$, rOut$())
      If err <> 0
         g_LastError = err
         If err > 0
            err = #Err_ExitCode
         EndIf
         ProcedureReturn err
      EndIf
      
      ResetList(rOut$())
      While NextElement(rOut$())
         p = FindString(rOut$(), "p-value")
         If p
            ret = ValD(Mid(rOut$(), p+10))
         ElseIf FindString(rOut$(), "sample estimates:") = 1
            NextElement(rOut$())
            NextElement(rOut$())
            *cor\d = ValD(rOut$())
         EndIf
      Wend
      
      ProcedureReturn ret
   EndProcedure
   
   
   Procedure.d lm (x$, y$, *slope.Double, *ic.Double)
      ; -- Linear regression ("linear model")
      ; in : x$: values of the independent variable, separated by ',' (e.g. "1,2,3")
      ;      y$: values of the   dependent variable, separated by ','
      ; out: slope\d     : regression coefficient
      ;      ic\d        : intercept
      ;      return value: p-value of the regression coefficient (from the interval [0, 1]),
      ;                    or < 0 on error
      Protected commands$, err.i, ret.d
      Protected NewList rOut$()
      
      If CountString(x$, ",") <> CountString(y$, ",")
         g_LastError = #Err_UnbalancedValues
         ProcedureReturn #Err_UnbalancedValues
      EndIf
      
      commands$ = "x <- c(" + x$ + ")" + #LF$ +
                  "y <- c(" + y$ + ")" + #LF$ +
                  "summary(lm(y ~ x))"
      
      err = _RunScript(commands$, rOut$())
      If err <> 0
         g_LastError = err
         If err > 0
            err = #Err_ExitCode
         EndIf
         ProcedureReturn err
      EndIf
      
      ResetList(rOut$())
      While NextElement(rOut$())
         If FindString(rOut$(), "(Intercept)") = 1
            *ic\d = ValD(Mid(rOut$(), 13, 8))
         ElseIf FindString(rOut$(), "x") = 1
            *slope\d = ValD(Mid(rOut$(), 13, 8))
            ret = ValD(Mid(rOut$(), 41, 8))
            Break
         EndIf
      Wend
      
      ProcedureReturn ret
   EndProcedure
   
   
   Procedure.i binom_confint (k.i, n.i, level.d, *bounds.LoUpD)
      ; -- 2-sided confidence interval of the binomial distribution
      ; in : k    : number of successes in the binomial experiment
      ;      n    : number of independent trials in the binomial experiment
      ;      level: level of confidence to be applied (e.g. 0.99)
      ; out: *bounds     : lower and upper bound of the wanted confidence interval
      ;      return value: 0 on success,
      ;                    or < 0 on error
      Protected commands$, err.i
      Protected NewList rOut$()
      
      ; The package "binom" must be installed!
      commands$ = "library(binom)" + #LF$ +
                  "binom.confint(" + k + ", " + n + ", " + level + ", method='exact')"
      
      err = _RunScript(commands$, rOut$())
      If err <> 0
         g_LastError = err
         If err > 0
            err = #Err_ExitCode
         EndIf
         ProcedureReturn err
      EndIf
      
      ResetList(rOut$())
      While NextElement(rOut$())
         If FindString(rOut$(), "exact")
            *bounds\lo = ValD(Mid(rOut$(), 25, 10))
            *bounds\up = ValD(Mid(rOut$(), 36, 10))
         EndIf
      Wend
      
      ProcedureReturn 0  ; success
   EndProcedure
EndModule


CompilerIf #PB_Compiler_IsMainFile
   ;-- Module demo
   EnableExplicit
   
   Define commands$, x$, y$, method$
   Define.i k, n, err
   Define.d p, cor, slope, ic
   Define bounds.R::LoUpD
   
   If R::SetPath("..\..\R-4.2.2\bin\Rscript.exe") < 0
      MessageRequester("R.pbi", ~"Error:\n'Rscript' not found.", #PB_MessageRequester_Error)
      End
   EndIf
   
   ; -----------------------------------------------
   
   x$ = "1, 2, 3, 4, 5"
   y$ = "4.9, 5.1, 5.5, 5.9, 6.4"
   method$ = "pearson"
   
   commands$ = "x <- c(" + x$ + ")" + #LF$ +
               "y <- c(" + y$ + ")" + #LF$ +
               "cor.test(x, y, method='" + method$ + "')"
   
   Debug R::Raw(commands$)
   err = R::LastError()
   If err <> 0
      Debug "Raw() -> error " + err
   EndIf
   
   Debug "-----------------------------------------------"
   
   p = R::cor(x$, y$, @cor, "pearson")
   If p >= 0
      Debug "Pearson : cor = " + StrD(cor, 7) + "   //   p = " + p
   Else
      Debug "cor(pearson) -> error " + R::LastError()
   EndIf
   
   p = R::cor(x$, y$, @cor, "kendall")
   If p >= 0
      Debug "Kendall : cor = " + StrD(cor, 7) + "   //   p = " + p
   Else
      Debug "cor(kendall) -> error " + R::LastError()
   EndIf
   
   p = R::cor(x$, y$, @cor, "spearman")
   If p >= 0
      Debug "Spearman: cor = " + StrD(cor, 7) + "   //   p = " + p
   Else
      Debug "cor(spearman) -> error " + R::LastError()
   EndIf
   
   Debug "-----------------------------------------------"
   
   p = R::lm(x$, y$, @slope, @ic)
   If p >= 0
      Debug "y = " + StrD(slope, 2) + "*x + " + StrD(ic, 2) + "     //   p = " + p
   Else
      Debug "lm() -> error " + R::LastError()
   EndIf
   
   Debug "-----------------------------------------------"
   
   k =  3
   n = 19
   If R::binom_confint(k, n, 0.95, @bounds) = 0
      Debug "lower bound = " + bounds\lo
      Debug "upper bound = " + bounds\up
   Else
      Debug "binom_confint() -> error " + R::LastError()
   EndIf
CompilerEndIf

-------------------------------------------------

My best tricks & tips from 15+ years
Create arrays elegantly
Extended date library
Save JSON data with object members well-arranged
Evaluate and process math expressions
Functions for sets
Statistics with R
Thue-Morse sequence
Natural sorting
Sort array indexes and parallel arrays
Time profiling
VectorIcons