How to perform a kfino outlier detection on multiple individuals

library(kfino)
library(dplyr)
library(foreach)
library(parallel)
library(doParallel)
#> Loading required package: iterators

This vignette shows how to use parallelization on a data set containing a set of animals weighted over time with the walk-over-weighing system. The lambs data set is included in the kfino package and can be loaded using the data() function.

We use the parallel and doParallel libraries to accelerate the computing time.

data(lambs)
myIDE<-unique(lambs$IDE)

print(myIDE)
#> [1] "250017033503030" "250017033503074" "250017033503092" "250017033503096"

Without parallelization

param=list(m0=NULL,
             mm=NULL,
             pp=NULL,
             aa=0.001,
             expertMin=10,
             expertMax=45,
             sigma2_m0=1,
             sigma2_mm=0.05,
             sigma2_pp=5,
             K=15,
             seqp=seq(0.4,0.7,0.1))

t0 <- Sys.time()
resu1<-list()

for (i in seq_along(myIDE)){
  print(myIDE[i])
  tp.test<-filter(lambs,IDE == myIDE[i])
  print(dim(tp.test))
  resu1[[i]]<-kfino_fit(datain=tp.test,
              Tvar="dateNum",Yvar="Poids",
              param=param,
              doOptim=TRUE)
}
#> [1] "250017033503030"
#> [1] 101   5
#> [1] "250017033503074"
#> [1] 416   5
#> [1] "250017033503092"
#> [1] 213   5
#> [1] "250017033503096"
#> [1] 566   5
Sys.time() - t0
#> Time difference of 21.97678 secs

print(length(resu1))
#> [1] 4

Parallel execution

An example improving the computation time of a run on a complete dataset by parallelizing the call.


param=list(m0=NULL,
             mm=NULL,
             pp=NULL,
             aa=0.001,
             expertMin=10,
             expertMax=45,
             sigma2_m0=1,
             sigma2_mm=0.05,
             sigma2_pp=5,
             K=15,
             seqp=seq(0.4,0.7,0.1))

t0<-Sys.time()

simpleCall<-function(datain,Index,Tvar,Yvar,param){
  datain<-as.data.frame(datain)
  ici<-unique(datain[,"IDE"])
  tp.data<-datain[ datain[,"IDE"] == ici[Index],]

  tp.resu<-kfino::kfino_fit(datain=tp.data,
              Tvar=Tvar,Yvar=Yvar,
              param=param,
              doOptim=TRUE)
  return(tp.resu)
}

ncores<-parallel::detectCores()
myCluster<-parallel::makeCluster(ncores - 1)
doParallel::registerDoParallel(myCluster)

resu2<-foreach(i=seq_along(myIDE), .packages="kfino") %dopar% 
            simpleCall(datain=lambs,
                       Index=i,
                       Tvar="dateNum",
                       Yvar="Poids",
                       param=param)

parallel::stopCluster(myCluster)
Sys.time() - t0
#> Time difference of 13.23899 secs

print(length(resu2))
#> [1] 4
identical(resu1,resu2)
#> [1] TRUE

References

  1. E.González-García et. al. (2018) A mobile and automated walk-over-weighing system for a close and remote monitoring of liveweight in sheep. vol 153: 226-238. https://doi.org/10.1016/j.compag.2018.08.022
  2. Corporation M, Weston S (2022). doParallel: Foreach Parallel Adaptor for the ‘parallel’ Package. R package version 1.0.17, https://CRAN.R-project.org/package=doParallel.

session info

sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] parallel  stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#> [1] doParallel_1.0.17 iterators_1.0.14  foreach_1.5.2     ggplot2_3.5.1    
#> [5] dplyr_1.1.4       kfino_1.0.0      
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6      jsonlite_1.8.9    compiler_4.4.2    tidyselect_1.2.1 
#>  [5] jquerylib_0.1.4   scales_1.3.0      yaml_2.3.10       fastmap_1.2.0    
#>  [9] R6_2.5.1          labeling_0.4.3    generics_0.1.3    knitr_1.49       
#> [13] tibble_3.2.1      maketools_1.3.1   munsell_0.5.1     bslib_0.8.0      
#> [17] pillar_1.9.0      rlang_1.1.4       utf8_1.2.4        cachem_1.1.0     
#> [21] xfun_0.49         sass_0.4.9        sys_3.4.3         cli_3.6.3        
#> [25] withr_3.0.2       magrittr_2.0.3    digest_0.6.37     grid_4.4.2       
#> [29] lifecycle_1.0.4   vctrs_0.6.5       evaluate_1.0.1    glue_1.8.0       
#> [33] farver_2.1.2      codetools_0.2-20  buildtools_1.0.0  fansi_1.0.6      
#> [37] colorspace_2.1-1  rmarkdown_2.29    tools_4.4.2       pkgconfig_2.0.3  
#> [41] htmltools_0.5.8.1