I am in the process of updating old R projects. In this process I need executed old projects and new projects.
I have manage to run old projects sequentially (one after another) and I am trying to do the same thing in parallel.
These scripts do not return any value because they write into a database.
This is the working code
execute_script <- function(script_name, folder_path, required_data){
#' This function prepares the input for a folder and a given script
#' @param script_name: Contains the name of the executable .R script to be executed
#' @param folder_path: Contains the folder where the script to executed is stored
#' @param required_data: This contains the data required to run the script.
#' @return when finished returns TRUE to ensure it has ended
# I need to do this to join path on server with filename (it depends on user selection)
script_to_execute <- paste0(folder_path,"\\", script_name)
script_to_execute <- paste0((unlist(strsplit(script_to_execute, split='\\', fixed = TRUE))), collapse = "/")
tryCatch(
expr = {
source(script_to_execute, local = TRUE, echo = TRUE, print.eval = TRUE, spaced = TRUE, chdir = TRUE)
},
error = function(e){
print(paste(Sys.time(),"Excepcion execute_script: ",script_name))
print(paste(Sys.time(),"Excepcion execute_script: ",e))
return(FALSE)
}
)
return(TRUE) # TODO chech
}
model_execution_sequence <- function(script_required_data, selected_scripts){
#' Executes the scripts received in selected_projects using script_required_data
#' @param script_required_data contains a dataframe with the input
#'
#' @param selected_scripts is a datatable that contains all the information required to execute a script
#'
#' @return a boolean information to determine if execution was success or not
TYPE_1_MODEL <- selected_scripts[APPROACH == '1']
if (nrow(TYPE_1_MODEL) > 0) {
my_iters <- 1:dim(TYPE_1_MODEL)[1]
for (x in my_iters) {
cat("\n\n")
a <- execute_script(TYPE_1_MODEL$FILE[x],
TYPE_1_MODEL$PATH[x],
script_required_data)
print(paste(Sys.time(),"Executed",TYPE_1_MODEL$FILE[x],"result:",a))
}
}else{
print(paste("Error: Unexpected approach named: ",selected_scripts," "))
}
return(TRUE)
}
This is the code I have tested with parallelization
model_execution_foreach <- function(script_required_data, selected_models){
#' Executes the scripts received in selected_projects using script_required_data
#' @param script_required_data contains a dataframe with the input
#'
#' @param selected_scripts is a datatable that contains all the information required to execute a script
#'
#' @return a boolean information to determine if execution was success or not
TYPE_1_MODEL <- selected_scripts[TYPE == '1']
if (nrow(TYPE_1_MODEL) > 0) {
TYPE_1_MODEL <- distinct(TYPE_1_MODEL)
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores, type = "PSOCK")
foreach (i = 1:dim(TYPE_1_MODEL)[1] ) %dopar%{
ml<- data.frame(execute_script(TYPE_1_MODEL$FILE[i], TYPE_1_MODEL$PATH[i], script_required_data))
}
stopCluster(cl)
}else{
print(paste("Error: Unexpected TYPE named: ",selected_scripts))
}
return(TRUE)
}
Reason to say it does not work
- The script doesn't break (I put it inside a tryCatch)
- Each script should write data into the database and they are not
I have followed links like: parallelize function
Thanks in advance