Title: | Running Commands Remotely on 'Gridengine' Clusters |
---|---|
Description: | Run lapply() calls in parallel by submitting them to 'gridengine' clusters using the 'qsub' command. |
Authors: | Robrecht Cannoodt [aut, cre] (<https://orcid.org/0000-0003-3641-729X>, rcannood), Wouter Saelens [aut] (<https://orcid.org/0000-0002-7114-6248>, zouter) |
Maintainer: | Robrecht Cannoodt <[email protected]> |
License: | GPL-3 |
Version: | 1.1.3 |
Built: | 2024-10-25 02:44:20 UTC |
Source: | https://github.com/rcannood/qsub |
Read from a file remotely
cat_remote(path, remote = FALSE, verbose = FALSE)
cat_remote(path, remote = FALSE, verbose = FALSE)
path |
Path of the file. |
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |
A wrapper around the scp shell command that handles local/remote files and allows copying between remote hosts via the local machine.
cp_remote( remote_src, path_src, remote_dest, path_dest, verbose = FALSE, recursively = FALSE )
cp_remote( remote_src, path_src, remote_dest, path_dest, verbose = FALSE, recursively = FALSE )
remote_src |
Remote machine for the source file in the format |
path_src |
Path of the source file. |
remote_dest |
Remote machine for the destination file in the format |
path_dest |
Path for the source file; can be a directory. |
verbose |
Prints elapsed time if TRUE |
recursively |
Copy a directory recursively? |
## Not run: ## Copy file myfile.csv from the home directory on the remote server to ## the local working directory. ## on remote server in bash shell: # cat myfile.csv # [me@myserver ~]$ cat myfile.csv # "val","ts" # 1, # 2, # 3, # 4, # 5, # 6, # 7, # 8, # 9, # 10, ## on local server in R: cp_remote(remote_src = "me@myserver", path_src = "~/myfile.csv", remote_dest = FALSE, path_dest = getwd(), verbose = TRUE) # [1] "Elapsed: 1.672 sec" df <- read.csv("myfile.csv") df # val ts # 1 1 NA # 2 2 NA # 3 3 NA # 4 4 NA # 5 5 NA # 6 6 NA # 7 7 NA # 8 8 NA # 9 9 NA # 10 10 NA ## End(Not run)
## Not run: ## Copy file myfile.csv from the home directory on the remote server to ## the local working directory. ## on remote server in bash shell: # cat myfile.csv # [me@myserver ~]$ cat myfile.csv # "val","ts" # 1, # 2, # 3, # 4, # 5, # 6, # 7, # 8, # 9, # 10, ## on local server in R: cp_remote(remote_src = "me@myserver", path_src = "~/myfile.csv", remote_dest = FALSE, path_dest = getwd(), verbose = TRUE) # [1] "Elapsed: 1.672 sec" df <- read.csv("myfile.csv") df # val ts # 1 1 NA # 2 2 NA # 3 3 NA # 4 4 NA # 5 5 NA # 6 6 NA # 7 7 NA # 8 8 NA # 9 9 NA # 10 10 NA ## End(Not run)
Create a qsub configuration object.
create_qsub_config( remote, local_tmp_path, remote_tmp_path, name = "r2qsub", num_cores = 1, memory = "4G", max_running_tasks = NULL, max_wall_time = "01:00:00", batch_tasks = 1, compress = c("gz", "bz2", "xz", "none"), modules = "R", execute_before = NULL, verbose = FALSE, wait = TRUE, remove_tmp_folder = TRUE, stop_on_error = TRUE ) override_qsub_config( qsub_config = get_default_qsub_config(), remote = qsub_config$remote, local_tmp_path = qsub_config$local_tmp_path, remote_tmp_path = qsub_config$remote_tmp_path, name = qsub_config$name, num_cores = qsub_config$num_cores, memory = qsub_config$memory, max_running_tasks = qsub_config$max_running_tasks, max_wall_time = qsub_config$max_wall_time, batch_tasks = qsub_config$batch_tasks, compress = qsub_config$compress, modules = qsub_config$modules, execute_before = qsub_config$execute_before, verbose = qsub_config$verbose, wait = qsub_config$wait, remove_tmp_folder = qsub_config$remove_tmp_folder, stop_on_error = qsub_config$stop_on_error )
create_qsub_config( remote, local_tmp_path, remote_tmp_path, name = "r2qsub", num_cores = 1, memory = "4G", max_running_tasks = NULL, max_wall_time = "01:00:00", batch_tasks = 1, compress = c("gz", "bz2", "xz", "none"), modules = "R", execute_before = NULL, verbose = FALSE, wait = TRUE, remove_tmp_folder = TRUE, stop_on_error = TRUE ) override_qsub_config( qsub_config = get_default_qsub_config(), remote = qsub_config$remote, local_tmp_path = qsub_config$local_tmp_path, remote_tmp_path = qsub_config$remote_tmp_path, name = qsub_config$name, num_cores = qsub_config$num_cores, memory = qsub_config$memory, max_running_tasks = qsub_config$max_running_tasks, max_wall_time = qsub_config$max_wall_time, batch_tasks = qsub_config$batch_tasks, compress = qsub_config$compress, modules = qsub_config$modules, execute_before = qsub_config$execute_before, verbose = qsub_config$verbose, wait = qsub_config$wait, remove_tmp_folder = qsub_config$remove_tmp_folder, stop_on_error = qsub_config$stop_on_error )
remote |
Remote machine specification for ssh, in format such as |
local_tmp_path |
A directory on the local machine in which to store temporary files. Should not contain a tilde ('~'). |
remote_tmp_path |
A directory on the remote machine in which to store temporary files. Should not contain a tilde ('~'). |
name |
The name of the execution. This will show up, for instance, in |
num_cores |
The number of cores to allocate per element in |
memory |
The memory to allocate per core (default: |
max_running_tasks |
limit concurrent array job task execution (default: |
max_wall_time |
The maximum time each task is allowed to run (default: |
batch_tasks |
How many values in |
compress |
Compression method to use: |
modules |
Which modules to load (default: |
execute_before |
Commands to execute in the bash shell before running R. |
verbose |
Whether or not to print out any ssh commands. |
wait |
If |
remove_tmp_folder |
If |
stop_on_error |
If |
qsub_config |
A qsub_config to be overridden |
A qsub configuration object.
qsub_lapply
, set_default_qsub_config
## Not run: qsub_config <- create_qsub_config( remote = "[email protected]:22", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) qsub_lapply(1:10, function(x) x + 1, qsub_config = qsub_config) set_default_qsub_config(qsub_config, permanent = TRUE) qsub_lapply(1:10, function(x) x + 1) qsub_lapply( X = 1:10, FUN = function(x) x + 1, qsub_config = override_qsub_config(verbose = TRUE) ) ## End(Not run)
## Not run: qsub_config <- create_qsub_config( remote = "[email protected]:22", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) qsub_lapply(1:10, function(x) x + 1, qsub_config = qsub_config) set_default_qsub_config(qsub_config, permanent = TRUE) qsub_lapply(1:10, function(x) x + 1) qsub_lapply( X = 1:10, FUN = function(x) x + 1, qsub_config = override_qsub_config(verbose = TRUE) ) ## End(Not run)
Create an SSH connection with remote
create_ssh_connection(remote)
create_ssh_connection(remote)
remote |
Remote machine specification for ssh, in format such as |
Checks if a local or remote file exists.
file_exists_remote(file, remote = FALSE, verbose = FALSE)
file_exists_remote(file, remote = FALSE, verbose = FALSE)
file |
File path. |
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |
TRUE
or FALSE
indicating whether the file exists.
## Not run: file_exists_remote("~/myfile.csv", remote = "me@myserver") # [1] TRUE ## End(Not run)
## Not run: file_exists_remote("~/myfile.csv", remote = "me@myserver") # [1] TRUE ## End(Not run)
Will prefer the temporary default over the permanent default. You should typically not require this function.
get_default_qsub_config(config_file = config_file_location())
get_default_qsub_config(config_file = config_file_location())
config_file |
The file in which a permanent default config is stored. |
This function generates the paths for the temporary files.
instantiate_qsub_config(qsub_config)
instantiate_qsub_config(qsub_config)
qsub_config |
A valid qsub_config object. ## @export # you should typically not require to call this function manually. |
Check whether a job is running.
is_job_running(qsub_config)
is_job_running(qsub_config)
qsub_config |
The qsub configuration of class |
Returns whether the passed object is a qsub_config object.
is_qsub_config(object)
is_qsub_config(object)
object |
The object to be tested |
Tests whether the remote is a local host or not.
is_remote_local(remote)
is_remote_local(remote)
remote |
A putative remote machine. This function will return true if |
View the contents of a directory remotely
ls_remote(path, remote = FALSE, verbose = FALSE)
ls_remote(path, remote = FALSE, verbose = FALSE)
path |
Path of the directory. |
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |
Creates a remote directory with the specified group ownership and permissions.
mkdir_remote(path, remote = FALSE, verbose = FALSE)
mkdir_remote(path, remote = FALSE, verbose = FALSE)
path |
Directory path. If using |
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |
Run qacct on remote
qacct(qsub_config)
qacct(qsub_config)
qsub_config |
The config |
Run qacct on remote
qacct_remote(job_id, remote = FALSE)
qacct_remote(job_id, remote = FALSE)
job_id |
The job_id of the job |
remote |
Remote machine specification for ssh, in format such as |
Run qstat on remote
qstat_j(qsub_config)
qstat_j(qsub_config)
qsub_config |
The config |
Run qstat on remote
qstat_j_remote(job_id, remote = FALSE)
qstat_j_remote(job_id, remote = FALSE)
job_id |
The job_id of the job |
remote |
Remote machine specification for ssh, in format such as |
Show the status of Grid Engine jobs and queues
qstat_remote(remote = NULL, verbose = FALSE)
qstat_remote(remote = NULL, verbose = FALSE)
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |
Run 'lapply()' calls in parallel by submitting them to 'gridengine' clusters using the 'qsub' command.
Apply a Function over a List or Vector on a gridengine system!
qsub_lapply( X, FUN, object_envir = environment(FUN), qsub_config = NULL, qsub_environment = NULL, qsub_packages = NULL, ... )
qsub_lapply( X, FUN, object_envir = environment(FUN), qsub_config = NULL, qsub_environment = NULL, qsub_packages = NULL, ... )
X |
A vector (atomic or list) or an expression object. Other objects (including classed objects) will be coerced by base::as.list. |
FUN |
The function to be applied to each element of X. |
object_envir |
The environment in which to go looking for the qsub_environment variables, if these are characters. |
qsub_config |
The configuration to use for this execution. |
qsub_environment |
|
qsub_packages |
The packages to be loaded on the cluster. |
... |
optional arguments to FUN. |
create_qsub_config
, set_default_qsub_config
## Not run: # Initial configuration and execution qsub_config <- create_qsub_config( remote = "myserver", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) qsub_lapply( X = seq_len(3), FUN = function(i) { Sys.sleep(1); i+1 }, qsub_config = qsub_config ) # Setting a default configuration and short hand notation for execution set_default_qsub_config(qsub_config, permanent = T) qsub_lapply(seq_len(3), function(i) { Sys.sleep(1); i+1 }) # Overriding a default qsub_config qsub_lapply(seq_len(3), function(i) i + 1, qsub_config = override_qsub_config(name = "MyJob")) # Don't wait for results, get a handle instead and retrieve later. handle <- qsub_lapply(seq_len(3), function(i) i + 1, qsub_config = override_qsub_config(wait = F)) # Wait until results have been generated on the remote # Retrieve results qsub_retrieve(handle) ## End(Not run)
## Not run: # Initial configuration and execution qsub_config <- create_qsub_config( remote = "myserver", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) qsub_lapply( X = seq_len(3), FUN = function(i) { Sys.sleep(1); i+1 }, qsub_config = qsub_config ) # Setting a default configuration and short hand notation for execution set_default_qsub_config(qsub_config, permanent = T) qsub_lapply(seq_len(3), function(i) { Sys.sleep(1); i+1 }) # Overriding a default qsub_config qsub_lapply(seq_len(3), function(i) i + 1, qsub_config = override_qsub_config(name = "MyJob")) # Don't wait for results, get a handle instead and retrieve later. handle <- qsub_lapply(seq_len(3), function(i) i + 1, qsub_config = override_qsub_config(wait = F)) # Wait until results have been generated on the remote # Retrieve results qsub_retrieve(handle) ## End(Not run)
Retrieve the results of a qsub execution.
qsub_retrieve(qsub_config, wait = TRUE, post_fun = NULL)
qsub_retrieve(qsub_config, wait = TRUE, post_fun = NULL)
qsub_config |
The qsub configuration of class |
wait |
If |
post_fun |
Apply a function to the output after execution. Interface: |
Run a Function on a gridengine system!
qsub_run(FUN, qsub_config = NULL, qsub_environment = NULL, ...)
qsub_run(FUN, qsub_config = NULL, qsub_environment = NULL, ...)
FUN |
the function to be executed. |
qsub_config |
The configuration to use for this execution. |
qsub_environment |
|
... |
optional arguments to FUN. |
create_qsub_config
, set_default_qsub_config
Remove a file or folder
rm_remote(path, remote, recursive = FALSE, force = FALSE, verbose = FALSE)
rm_remote(path, remote, recursive = FALSE, force = FALSE, verbose = FALSE)
path |
Path of the file/folder |
remote |
Remote machine specification for ssh, in format such as |
recursive |
Whether to work recursively |
force |
Whether to force removal |
verbose |
If |
A wrapper around the rsync shell command that allows copying between remote hosts via the local machine.
rsync_remote( remote_src, path_src, remote_dest, path_dest, compress = TRUE, delete = "no", exclude = NULL, verbose = FALSE )
rsync_remote( remote_src, path_src, remote_dest, path_dest, compress = TRUE, delete = "no", exclude = NULL, verbose = FALSE )
remote_src |
Remote machine for the source, see the section below 'Specifying a remote'. |
path_src |
Path of the source file. |
remote_dest |
Remote machine for the destination, see the section below 'Specifying a remote'. |
path_dest |
Path for the source file; can be a directory. |
compress |
Whether or not to compress the data being transferred. |
delete |
Whether or not to delete files at the target remote. Use |
exclude |
A vector of files / regexs to be excluded. |
verbose |
Prints elapsed time if TRUE. |
A remote can be specified in one of the following ways:
A character vector in format user@ipaddress:port
,
The name of a Host in the ~/.ssh/config
file,
FALSE
for the local machine,
TRUE
for the default remote specified as get_default_qsub_config()$remote
.
run_remote
- Runs the command locally or remotely using ssh.In run_remote
the remote commands are enclosed in wrappers that allow to capture output.
By default stderr is redirected to stdout.
If there's a genuine error, e.g., the remote command does not exist, the output is not captured. In this case, one can
see the output by setting intern
to FALSE
. However, when the command is run but exits with non-zero code,
run_remote
intercepts the generated warning and saves the output.
run_remote( command, remote = FALSE, args = character(), verbose = FALSE, shell = FALSE )
run_remote( command, remote = FALSE, args = character(), verbose = FALSE, shell = FALSE )
command |
Command to run. If run locally, quotes should be escaped once. If run remotely, quotes should be escaped twice. |
remote |
Remote machine specification for ssh, in format such as |
args |
Character vector, arguments to the command. |
verbose |
If |
shell |
Whether to execute the command in a shell |
The remote command will be put inside double quotes twice, so all quotes in cmd must be escaped twice: \\"
.
However, if the command is not remote, i.e., remote
is NULL
or empty string, quotes should be escaped
only once.
If the command itself redirects output, the stderr_redirect
flag should be set to FALSE
.
A list with components:
status
The exit status of the process. If this is NA, then the process was killed and had no exit status.
stdout
The standard output of the command, in a character scalar.
stderr
The standard error of the command, in a character scalar.
elapsed_time
The number of seconds required before this function returned an output.
Warnings are really errors here so the error flag is set if there are warnings.
If permanent, the qsub_config will be written to the specified path. Otherwise, it will be saved in the current environment.
set_default_qsub_config( qsub_config, permanent = TRUE, config_file = config_file_location() )
set_default_qsub_config( qsub_config, permanent = TRUE, config_file = config_file_location() )
qsub_config |
The qsub_config to use as default. |
permanent |
Whether or not to make this the default qsub_config. |
config_file |
The location to which to save the permanent qsub_config. |
qsub_lapply
, create_qsub_config
## Not run: qsub_config <- create_qsub_config( remote = "myserver", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) set_default_qsub_config(qsub_config, permanent = T) qsub_lapply(1:10, function(x) x + 1) ## End(Not run)
## Not run: qsub_config <- create_qsub_config( remote = "myserver", local_tmp_path = "/home/myuser/workspace/.r2gridengine", remote_tmp_path = "/scratch/myuser/.r2gridengine" ) set_default_qsub_config(qsub_config, permanent = T) qsub_lapply(1:10, function(x) x + 1) ## End(Not run)
Tests whether the passed object is a qsub_config object.
test_qsub_config(object)
test_qsub_config(object)
object |
The object to be tested |
Write to a file remotely
write_remote(x, path, remote = FALSE, verbose = FALSE)
write_remote(x, path, remote = FALSE, verbose = FALSE)
x |
The text to write to the file. |
path |
Path of the file. |
remote |
Remote machine specification for ssh, in format such as |
verbose |
If |