Difference between revisions of "Rocky R Prime"

From NIMBioS
 
(14 intermediate revisions by 2 users not shown)
Line 1: Line 1:
Insert example here
== Prime Number example code ==
 
=== R serial script ===
<pre>
library(foreach)
registerDoSEQ()
 
known_primes <-
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)
 
search_space <- 34:999
 
found_primes <- foreach(check_i= search_space, .combine=c ) %do% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}
 
write.table(sort(c(known_primes, found_primes)), file="R_prime_serial.txt",
            row.names = F,col.names = F)
</pre>
 
=== R parallel script ===
Naive version
<pre>
library(foreach)
library(doParallel)
 
registerDoParallel(40)
 
known_primes <-
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)
 
search_space <- 34:999
 
found_primes <- foreach(check_i= search_space, .combine=c ) %dopar% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}
 
write.table(sort(c(known_primes, found_primes)), file="R_prime_parallel.txt",
            row.names = F,col.names = F)
</pre>
Only check numbers ending in 1, 3, 7 and 9
<pre>
library(foreach)
library(doParallel)
 
registerDoParallel(40)
 
known_primes <-
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)
 
search_space <- rep(3:99, each=4)*10 + rep(c(1,3,7,9), 97)
 
found_primes <- foreach(check_i= search_space, .combine=c ) %dopar% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}
 
write.table(sort(c(known_primes, found_primes)), file="R_prime_parallel_fast.txt",
            row.names = F,col.names = F)
</pre>
 
=== Slurm submission script ===
<pre>
#!/bin/bash
#SBATCH --job-name=R_PRIME        ### Job Name
#SBATCH --output=R_prime_%j.out  ### File in which to store job output
#SBATCH --time=00:10:00          ### Wall clock time limit in Days-HH:MM:SS
#SBATCH --nodes=1                ### Node count required for the job
#SBATCH --ntasks-per-node=1      ### Number of tasks to be launched per Node
#SBATCH --mem-per-cpu=2G
 
#SBATCH --cpus-per-task=40
 
module load R/4.2.1-foss-2022a
date
R --version
 
time Rscript R_prime_serial.R
date
time Rscript R_prime_parallel.R
date
time Rscript R_prime_parallel_fast.R
date
</pre>
 
== Running the Prime Number example ==
=== Uploading the code ===
The simple way to see this code is to clone the github repository:
 
<pre>
[[ -e ~/git ]]  || mkdir ~/git  # Auto-create git subdirectory
cd ~/git                        # switch to git subdirectory
[[ -e r-prime-cluster ]] || git clone git@github.com:rocky-cluster/r-prime-cluster.git
cd r-prime-cluster
</pre>
 
Sanity check:
<pre>
cat BLAH
 
</pre>
 
=== Submitting the script ===
<pre>
sbatch R-prime.srun
</pre>
 
=== Looking at the results ===
Check the job queue, wait for job completion
 
<pre>
squeue
</pre>
 
Check log file:
<pre>
ls -l R_prime_*.out
 
# just look at the latest output log
cat $( ls -t R_prime_*.out | head -n 1)
</pre>
 
Check results file:
<pre>
# Quick hash check verifying prime number sets are identical
md5sum $( ls -t R_prime_*.txt | head -n 3)
</pre>

Latest revision as of 17:54, 10 April 2023

Prime Number example code

R serial script

library(foreach)
registerDoSEQ()

known_primes <- 
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)

search_space <- 34:999

found_primes <- foreach(check_i= search_space, .combine=c ) %do% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}

write.table(sort(c(known_primes, found_primes)), file="R_prime_serial.txt", 
            row.names = F,col.names = F)

R parallel script

Naive version

library(foreach)
library(doParallel)

registerDoParallel(40)

known_primes <- 
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)

search_space <- 34:999

found_primes <- foreach(check_i= search_space, .combine=c ) %dopar% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}

write.table(sort(c(known_primes, found_primes)), file="R_prime_parallel.txt", 
            row.names = F,col.names = F)

Only check numbers ending in 1, 3, 7 and 9

library(foreach)
library(doParallel)

registerDoParallel(40)

known_primes <- 
  c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31)

search_space <- rep(3:99, each=4)*10 + rep(c(1,3,7,9), 97)

found_primes <- foreach(check_i= search_space, .combine=c ) %dopar% {
  if(any((check_i %% known_primes )==0 ) ) return(numeric())
  return(c(check_i))
}

write.table(sort(c(known_primes, found_primes)), file="R_prime_parallel_fast.txt", 
            row.names = F,col.names = F)

Slurm submission script

#!/bin/bash
#SBATCH --job-name=R_PRIME        ### Job Name
#SBATCH --output=R_prime_%j.out   ### File in which to store job output
#SBATCH --time=00:10:00           ### Wall clock time limit in Days-HH:MM:SS
#SBATCH --nodes=1                 ### Node count required for the job
#SBATCH --ntasks-per-node=1       ### Number of tasks to be launched per Node
#SBATCH --mem-per-cpu=2G

#SBATCH --cpus-per-task=40

module load R/4.2.1-foss-2022a 
date
R --version

time Rscript R_prime_serial.R
date
time Rscript R_prime_parallel.R
date
time Rscript R_prime_parallel_fast.R
date

Running the Prime Number example

Uploading the code

The simple way to see this code is to clone the github repository:

[[ -e ~/git ]]  || mkdir ~/git  # Auto-create git subdirectory
cd ~/git                        # switch to git subdirectory
[[ -e r-prime-cluster ]] || git clone git@github.com:rocky-cluster/r-prime-cluster.git
cd r-prime-cluster

Sanity check:

cat BLAH

Submitting the script

sbatch R-prime.srun

Looking at the results

Check the job queue, wait for job completion

squeue

Check log file:

ls -l R_prime_*.out

# just look at the latest output log
cat $( ls -t R_prime_*.out | head -n 1)

Check results file:

# Quick hash check verifying prime number sets are identical
md5sum $( ls -t R_prime_*.txt | head -n 3)