Running multiple conditions at once in R
I wrote a code to apply a function to a data frame input:
set.seed(1234)
n = 5000000
input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
system.time(
test <- input %>%
split(1:nrow(input)) %>%
map(~ func1(.x, 2, 2, "test_1")) %>%
do.call("rbind", .))
## Here is the function used:
func1 <- function(dataC, PR, DB, MT){
c1 <- as.vector(dataC[1])
c2 <- as.vector(dataC[2])
c3 <- as.vector(dataC[3])
c4 <- as.vector(dataC[4])
newc1 <- -999
newc2 <- -999
if(MT=="test_1"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
} else if(MT=="test_2"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
}
tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5,
(1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
if(c3 == newc1 && c4 == newc2){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
if(choiceC[1]==1 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
}
newc1 <- as.vector(newC[,1])
newc2 <- as.vector(newC[,2])
}
return(newC)
}
The code works for small datasets but when the data frame contains more than 1 million rows, it is very slow. I think that there are many lines of code repeated in the function (e.g., condition if else) which decrease the speed. Are there ways to do all calculations in the function at once? I would really appreciate for any advice.
r
add a comment |
I wrote a code to apply a function to a data frame input:
set.seed(1234)
n = 5000000
input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
system.time(
test <- input %>%
split(1:nrow(input)) %>%
map(~ func1(.x, 2, 2, "test_1")) %>%
do.call("rbind", .))
## Here is the function used:
func1 <- function(dataC, PR, DB, MT){
c1 <- as.vector(dataC[1])
c2 <- as.vector(dataC[2])
c3 <- as.vector(dataC[3])
c4 <- as.vector(dataC[4])
newc1 <- -999
newc2 <- -999
if(MT=="test_1"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
} else if(MT=="test_2"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
}
tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5,
(1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
if(c3 == newc1 && c4 == newc2){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
if(choiceC[1]==1 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
}
newc1 <- as.vector(newC[,1])
newc2 <- as.vector(newC[,2])
}
return(newC)
}
The code works for small datasets but when the data frame contains more than 1 million rows, it is very slow. I think that there are many lines of code repeated in the function (e.g., condition if else) which decrease the speed. Are there ways to do all calculations in the function at once? I would really appreciate for any advice.
r
5
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
Please add: Which packages are you using? Is it OK to usedata.tableinstead ofdata.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g.ifelseinstead ofif)
– R Yoda
Nov 9 '18 at 17:25
2
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things likeifelsevsif.
– Julius Vainora
Nov 9 '18 at 19:31
add a comment |
I wrote a code to apply a function to a data frame input:
set.seed(1234)
n = 5000000
input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
system.time(
test <- input %>%
split(1:nrow(input)) %>%
map(~ func1(.x, 2, 2, "test_1")) %>%
do.call("rbind", .))
## Here is the function used:
func1 <- function(dataC, PR, DB, MT){
c1 <- as.vector(dataC[1])
c2 <- as.vector(dataC[2])
c3 <- as.vector(dataC[3])
c4 <- as.vector(dataC[4])
newc1 <- -999
newc2 <- -999
if(MT=="test_1"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
} else if(MT=="test_2"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
}
tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5,
(1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
if(c3 == newc1 && c4 == newc2){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
if(choiceC[1]==1 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
}
newc1 <- as.vector(newC[,1])
newc2 <- as.vector(newC[,2])
}
return(newC)
}
The code works for small datasets but when the data frame contains more than 1 million rows, it is very slow. I think that there are many lines of code repeated in the function (e.g., condition if else) which decrease the speed. Are there ways to do all calculations in the function at once? I would really appreciate for any advice.
r
I wrote a code to apply a function to a data frame input:
set.seed(1234)
n = 5000000
input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
system.time(
test <- input %>%
split(1:nrow(input)) %>%
map(~ func1(.x, 2, 2, "test_1")) %>%
do.call("rbind", .))
## Here is the function used:
func1 <- function(dataC, PR, DB, MT){
c1 <- as.vector(dataC[1])
c2 <- as.vector(dataC[2])
c3 <- as.vector(dataC[3])
c4 <- as.vector(dataC[4])
newc1 <- -999
newc2 <- -999
if(MT=="test_1"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
} else if(MT=="test_2"){
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))
V1 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR)))
V2 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR)))
V3 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1)))
V4 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
V5 <- 0
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1)))
V6 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1)))
V7 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
listC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1)))
V8 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * DB
listC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1)))
V9 <- harmonic.mean(sample(1:10, size = dim(listC)[1], replace = TRUE)) * sqrt(2) * DB
}
tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5,
(1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
if(c3 == newc1 && c4 == newc2){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
if(choiceC[1]==1 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==1){
newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==2){
newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==3){
newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
}
newc1 <- as.vector(newC[,1])
newc2 <- as.vector(newC[,2])
}
return(newC)
}
The code works for small datasets but when the data frame contains more than 1 million rows, it is very slow. I think that there are many lines of code repeated in the function (e.g., condition if else) which decrease the speed. Are there ways to do all calculations in the function at once? I would really appreciate for any advice.
r
r
asked Nov 7 '18 at 15:33
PierrePierre
33111
33111
5
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
Please add: Which packages are you using? Is it OK to usedata.tableinstead ofdata.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g.ifelseinstead ofif)
– R Yoda
Nov 9 '18 at 17:25
2
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things likeifelsevsif.
– Julius Vainora
Nov 9 '18 at 19:31
add a comment |
5
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
Please add: Which packages are you using? Is it OK to usedata.tableinstead ofdata.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g.ifelseinstead ofif)
– R Yoda
Nov 9 '18 at 17:25
2
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things likeifelsevsif.
– Julius Vainora
Nov 9 '18 at 19:31
5
5
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
Please add: Which packages are you using? Is it OK to use
data.table instead of data.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g. ifelse instead of if)– R Yoda
Nov 9 '18 at 17:25
Please add: Which packages are you using? Is it OK to use
data.table instead of data.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g. ifelse instead of if)– R Yoda
Nov 9 '18 at 17:25
2
2
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things like
ifelse vs if.– Julius Vainora
Nov 9 '18 at 19:31
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things like
ifelse vs if.– Julius Vainora
Nov 9 '18 at 19:31
add a comment |
1 Answer
1
active
oldest
votes
First a bit of tough love but I strongly encourage you to cover your bases, your code is a concentrate of bad practices and you'll get a huge ROI by spending a bit of time studying vectorisation etc... Consider also posting this on https://codereview.stackexchange.com/questions/tagged/r next time as it is a more appropriate question for there.
Your bottleneck is not the nested ifs but the inadequate use of expand.grid.
You create in your codes data frames through expand.grid, that you improperly call listC (they're not lists). Then this costly data.frame is only used for its number of rows, which you get with dim(listC)[1] which would be more idiomatic typed nrow(listC).
This value (dim(listC)[1]) can only be PR^2 or 3*PR in practice, so you could compute those first and just reuse them.
The nested ifs can be replaced with nested switch statements, more readable, and
by testing the first choice only once we're also more efficient.
It allows us to see that you forgot one condition in your code. See your improved code below.
When looking at it once it's more tidy, we see that we could actually replace it by simply newC <- c(c1 - 2 + choice[2], c2 + 2 - choice[1]).
Additional observations
- comment your code, not for us, for you (and then for us when you decide to post a question)
c2 <- as.vector(dataC[2])can be replaced byc2 <- dataC[[2]]
- A matrix of 2 columns and one row can be built by
t(c(1,2))instead ofmatrix(c(x = 1, y = 2), ncol = 2), but if you're going to useas.vectoron it in the end, doc(1,2)in the first place - the code could probably be optimized much further
modified code
func1 <- function(dataC, PR, DB, MT){
c1 <- dataC[[1]]
c2 <- dataC[[2]]
c3 <- dataC[[3]]
c4 <- dataC[[4]]
fun <- if(MT=="test_1") mean else if(MT=="test_2") harmonic.mean
fun2 <- function(size,mult)
fun(sample(1:10, size = size, replace = TRUE)) * mult
pr_sq <- PR^2
pr_3 <- 3*PR
sqrt_2_DB <- sqrt(2) * DB
V1 <- fun2(pr_sq, sqrt_2_DB)
V2 <- fun2(pr_3, DB)
V3 <- fun2(pr_sq, sqrt_2_DB)
V4 <- fun2(pr_3, DB)
V5 <- 0
V6 <- fun2(pr_3, DB)
V7 <- fun2(pr_sq, sqrt_2_DB)
V8 <- fun2(pr_3, DB)
V9 <- fun2(pr_sq, sqrt_2_DB)
inv <- 1/c(V1, V2, V3, V4, V6, V7, V8, V9)
tot <- sum(inv, na.rm = TRUE)
mat_V <- matrix(data = c(inv[1:4], V5, inv[5:8]) / tot,
nrow = 3, ncol = 3, byrow = TRUE)
newC <- NULL
while(is.null(newC) || identical(c(c3,c4), newC)){
if(identical(c(c3,c4), newC)){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
newC <- c(c1 - 2 + choiceC[2], c2 + 2 - choiceC[1])
# using switch it would have been
# newC <- switch(choiceC[1],
# `1` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 + 1),
# `2` = c(x = c1, y = c2 + 1),
# `3` = c(x = c1 + 1, y = c2 + 1)),
# `2` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2),
# `2` = c(x = c1, y = c2), # you were missing this one
# `3` = c(x = c1 + 1, y = c2)),
# `3` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 - 1),
# `2` = c(x = c1, y = c2 - 1),
# `3` = c(x = c1 + 1, y = c2 - 1)))
}
t(newC)
}
1
Note I corrected an error, I had left an extrasqrt(2)
– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frameexpand.gridis used for its content (for example,listC[,1]andlistC[,2]), is there a way to replace the data frameexpand.grid?
– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,expand.gridexists for a reason so you might really need it, on the other hand iflistC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))and you needlistC[,1], then you needc((c2 + 1) : (c2 + PR))[1], which isc2+1
– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))andlistC[,1](not onlylistC[1,1]) is used to calculate the mean instead ofsample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?
– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
|
show 1 more comment
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53192662%2frunning-multiple-conditions-at-once-in-r%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
First a bit of tough love but I strongly encourage you to cover your bases, your code is a concentrate of bad practices and you'll get a huge ROI by spending a bit of time studying vectorisation etc... Consider also posting this on https://codereview.stackexchange.com/questions/tagged/r next time as it is a more appropriate question for there.
Your bottleneck is not the nested ifs but the inadequate use of expand.grid.
You create in your codes data frames through expand.grid, that you improperly call listC (they're not lists). Then this costly data.frame is only used for its number of rows, which you get with dim(listC)[1] which would be more idiomatic typed nrow(listC).
This value (dim(listC)[1]) can only be PR^2 or 3*PR in practice, so you could compute those first and just reuse them.
The nested ifs can be replaced with nested switch statements, more readable, and
by testing the first choice only once we're also more efficient.
It allows us to see that you forgot one condition in your code. See your improved code below.
When looking at it once it's more tidy, we see that we could actually replace it by simply newC <- c(c1 - 2 + choice[2], c2 + 2 - choice[1]).
Additional observations
- comment your code, not for us, for you (and then for us when you decide to post a question)
c2 <- as.vector(dataC[2])can be replaced byc2 <- dataC[[2]]
- A matrix of 2 columns and one row can be built by
t(c(1,2))instead ofmatrix(c(x = 1, y = 2), ncol = 2), but if you're going to useas.vectoron it in the end, doc(1,2)in the first place - the code could probably be optimized much further
modified code
func1 <- function(dataC, PR, DB, MT){
c1 <- dataC[[1]]
c2 <- dataC[[2]]
c3 <- dataC[[3]]
c4 <- dataC[[4]]
fun <- if(MT=="test_1") mean else if(MT=="test_2") harmonic.mean
fun2 <- function(size,mult)
fun(sample(1:10, size = size, replace = TRUE)) * mult
pr_sq <- PR^2
pr_3 <- 3*PR
sqrt_2_DB <- sqrt(2) * DB
V1 <- fun2(pr_sq, sqrt_2_DB)
V2 <- fun2(pr_3, DB)
V3 <- fun2(pr_sq, sqrt_2_DB)
V4 <- fun2(pr_3, DB)
V5 <- 0
V6 <- fun2(pr_3, DB)
V7 <- fun2(pr_sq, sqrt_2_DB)
V8 <- fun2(pr_3, DB)
V9 <- fun2(pr_sq, sqrt_2_DB)
inv <- 1/c(V1, V2, V3, V4, V6, V7, V8, V9)
tot <- sum(inv, na.rm = TRUE)
mat_V <- matrix(data = c(inv[1:4], V5, inv[5:8]) / tot,
nrow = 3, ncol = 3, byrow = TRUE)
newC <- NULL
while(is.null(newC) || identical(c(c3,c4), newC)){
if(identical(c(c3,c4), newC)){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
newC <- c(c1 - 2 + choiceC[2], c2 + 2 - choiceC[1])
# using switch it would have been
# newC <- switch(choiceC[1],
# `1` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 + 1),
# `2` = c(x = c1, y = c2 + 1),
# `3` = c(x = c1 + 1, y = c2 + 1)),
# `2` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2),
# `2` = c(x = c1, y = c2), # you were missing this one
# `3` = c(x = c1 + 1, y = c2)),
# `3` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 - 1),
# `2` = c(x = c1, y = c2 - 1),
# `3` = c(x = c1 + 1, y = c2 - 1)))
}
t(newC)
}
1
Note I corrected an error, I had left an extrasqrt(2)
– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frameexpand.gridis used for its content (for example,listC[,1]andlistC[,2]), is there a way to replace the data frameexpand.grid?
– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,expand.gridexists for a reason so you might really need it, on the other hand iflistC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))and you needlistC[,1], then you needc((c2 + 1) : (c2 + PR))[1], which isc2+1
– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))andlistC[,1](not onlylistC[1,1]) is used to calculate the mean instead ofsample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?
– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
|
show 1 more comment
First a bit of tough love but I strongly encourage you to cover your bases, your code is a concentrate of bad practices and you'll get a huge ROI by spending a bit of time studying vectorisation etc... Consider also posting this on https://codereview.stackexchange.com/questions/tagged/r next time as it is a more appropriate question for there.
Your bottleneck is not the nested ifs but the inadequate use of expand.grid.
You create in your codes data frames through expand.grid, that you improperly call listC (they're not lists). Then this costly data.frame is only used for its number of rows, which you get with dim(listC)[1] which would be more idiomatic typed nrow(listC).
This value (dim(listC)[1]) can only be PR^2 or 3*PR in practice, so you could compute those first and just reuse them.
The nested ifs can be replaced with nested switch statements, more readable, and
by testing the first choice only once we're also more efficient.
It allows us to see that you forgot one condition in your code. See your improved code below.
When looking at it once it's more tidy, we see that we could actually replace it by simply newC <- c(c1 - 2 + choice[2], c2 + 2 - choice[1]).
Additional observations
- comment your code, not for us, for you (and then for us when you decide to post a question)
c2 <- as.vector(dataC[2])can be replaced byc2 <- dataC[[2]]
- A matrix of 2 columns and one row can be built by
t(c(1,2))instead ofmatrix(c(x = 1, y = 2), ncol = 2), but if you're going to useas.vectoron it in the end, doc(1,2)in the first place - the code could probably be optimized much further
modified code
func1 <- function(dataC, PR, DB, MT){
c1 <- dataC[[1]]
c2 <- dataC[[2]]
c3 <- dataC[[3]]
c4 <- dataC[[4]]
fun <- if(MT=="test_1") mean else if(MT=="test_2") harmonic.mean
fun2 <- function(size,mult)
fun(sample(1:10, size = size, replace = TRUE)) * mult
pr_sq <- PR^2
pr_3 <- 3*PR
sqrt_2_DB <- sqrt(2) * DB
V1 <- fun2(pr_sq, sqrt_2_DB)
V2 <- fun2(pr_3, DB)
V3 <- fun2(pr_sq, sqrt_2_DB)
V4 <- fun2(pr_3, DB)
V5 <- 0
V6 <- fun2(pr_3, DB)
V7 <- fun2(pr_sq, sqrt_2_DB)
V8 <- fun2(pr_3, DB)
V9 <- fun2(pr_sq, sqrt_2_DB)
inv <- 1/c(V1, V2, V3, V4, V6, V7, V8, V9)
tot <- sum(inv, na.rm = TRUE)
mat_V <- matrix(data = c(inv[1:4], V5, inv[5:8]) / tot,
nrow = 3, ncol = 3, byrow = TRUE)
newC <- NULL
while(is.null(newC) || identical(c(c3,c4), newC)){
if(identical(c(c3,c4), newC)){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
newC <- c(c1 - 2 + choiceC[2], c2 + 2 - choiceC[1])
# using switch it would have been
# newC <- switch(choiceC[1],
# `1` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 + 1),
# `2` = c(x = c1, y = c2 + 1),
# `3` = c(x = c1 + 1, y = c2 + 1)),
# `2` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2),
# `2` = c(x = c1, y = c2), # you were missing this one
# `3` = c(x = c1 + 1, y = c2)),
# `3` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 - 1),
# `2` = c(x = c1, y = c2 - 1),
# `3` = c(x = c1 + 1, y = c2 - 1)))
}
t(newC)
}
1
Note I corrected an error, I had left an extrasqrt(2)
– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frameexpand.gridis used for its content (for example,listC[,1]andlistC[,2]), is there a way to replace the data frameexpand.grid?
– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,expand.gridexists for a reason so you might really need it, on the other hand iflistC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))and you needlistC[,1], then you needc((c2 + 1) : (c2 + PR))[1], which isc2+1
– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))andlistC[,1](not onlylistC[1,1]) is used to calculate the mean instead ofsample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?
– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
|
show 1 more comment
First a bit of tough love but I strongly encourage you to cover your bases, your code is a concentrate of bad practices and you'll get a huge ROI by spending a bit of time studying vectorisation etc... Consider also posting this on https://codereview.stackexchange.com/questions/tagged/r next time as it is a more appropriate question for there.
Your bottleneck is not the nested ifs but the inadequate use of expand.grid.
You create in your codes data frames through expand.grid, that you improperly call listC (they're not lists). Then this costly data.frame is only used for its number of rows, which you get with dim(listC)[1] which would be more idiomatic typed nrow(listC).
This value (dim(listC)[1]) can only be PR^2 or 3*PR in practice, so you could compute those first and just reuse them.
The nested ifs can be replaced with nested switch statements, more readable, and
by testing the first choice only once we're also more efficient.
It allows us to see that you forgot one condition in your code. See your improved code below.
When looking at it once it's more tidy, we see that we could actually replace it by simply newC <- c(c1 - 2 + choice[2], c2 + 2 - choice[1]).
Additional observations
- comment your code, not for us, for you (and then for us when you decide to post a question)
c2 <- as.vector(dataC[2])can be replaced byc2 <- dataC[[2]]
- A matrix of 2 columns and one row can be built by
t(c(1,2))instead ofmatrix(c(x = 1, y = 2), ncol = 2), but if you're going to useas.vectoron it in the end, doc(1,2)in the first place - the code could probably be optimized much further
modified code
func1 <- function(dataC, PR, DB, MT){
c1 <- dataC[[1]]
c2 <- dataC[[2]]
c3 <- dataC[[3]]
c4 <- dataC[[4]]
fun <- if(MT=="test_1") mean else if(MT=="test_2") harmonic.mean
fun2 <- function(size,mult)
fun(sample(1:10, size = size, replace = TRUE)) * mult
pr_sq <- PR^2
pr_3 <- 3*PR
sqrt_2_DB <- sqrt(2) * DB
V1 <- fun2(pr_sq, sqrt_2_DB)
V2 <- fun2(pr_3, DB)
V3 <- fun2(pr_sq, sqrt_2_DB)
V4 <- fun2(pr_3, DB)
V5 <- 0
V6 <- fun2(pr_3, DB)
V7 <- fun2(pr_sq, sqrt_2_DB)
V8 <- fun2(pr_3, DB)
V9 <- fun2(pr_sq, sqrt_2_DB)
inv <- 1/c(V1, V2, V3, V4, V6, V7, V8, V9)
tot <- sum(inv, na.rm = TRUE)
mat_V <- matrix(data = c(inv[1:4], V5, inv[5:8]) / tot,
nrow = 3, ncol = 3, byrow = TRUE)
newC <- NULL
while(is.null(newC) || identical(c(c3,c4), newC)){
if(identical(c(c3,c4), newC)){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
newC <- c(c1 - 2 + choiceC[2], c2 + 2 - choiceC[1])
# using switch it would have been
# newC <- switch(choiceC[1],
# `1` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 + 1),
# `2` = c(x = c1, y = c2 + 1),
# `3` = c(x = c1 + 1, y = c2 + 1)),
# `2` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2),
# `2` = c(x = c1, y = c2), # you were missing this one
# `3` = c(x = c1 + 1, y = c2)),
# `3` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 - 1),
# `2` = c(x = c1, y = c2 - 1),
# `3` = c(x = c1 + 1, y = c2 - 1)))
}
t(newC)
}
First a bit of tough love but I strongly encourage you to cover your bases, your code is a concentrate of bad practices and you'll get a huge ROI by spending a bit of time studying vectorisation etc... Consider also posting this on https://codereview.stackexchange.com/questions/tagged/r next time as it is a more appropriate question for there.
Your bottleneck is not the nested ifs but the inadequate use of expand.grid.
You create in your codes data frames through expand.grid, that you improperly call listC (they're not lists). Then this costly data.frame is only used for its number of rows, which you get with dim(listC)[1] which would be more idiomatic typed nrow(listC).
This value (dim(listC)[1]) can only be PR^2 or 3*PR in practice, so you could compute those first and just reuse them.
The nested ifs can be replaced with nested switch statements, more readable, and
by testing the first choice only once we're also more efficient.
It allows us to see that you forgot one condition in your code. See your improved code below.
When looking at it once it's more tidy, we see that we could actually replace it by simply newC <- c(c1 - 2 + choice[2], c2 + 2 - choice[1]).
Additional observations
- comment your code, not for us, for you (and then for us when you decide to post a question)
c2 <- as.vector(dataC[2])can be replaced byc2 <- dataC[[2]]
- A matrix of 2 columns and one row can be built by
t(c(1,2))instead ofmatrix(c(x = 1, y = 2), ncol = 2), but if you're going to useas.vectoron it in the end, doc(1,2)in the first place - the code could probably be optimized much further
modified code
func1 <- function(dataC, PR, DB, MT){
c1 <- dataC[[1]]
c2 <- dataC[[2]]
c3 <- dataC[[3]]
c4 <- dataC[[4]]
fun <- if(MT=="test_1") mean else if(MT=="test_2") harmonic.mean
fun2 <- function(size,mult)
fun(sample(1:10, size = size, replace = TRUE)) * mult
pr_sq <- PR^2
pr_3 <- 3*PR
sqrt_2_DB <- sqrt(2) * DB
V1 <- fun2(pr_sq, sqrt_2_DB)
V2 <- fun2(pr_3, DB)
V3 <- fun2(pr_sq, sqrt_2_DB)
V4 <- fun2(pr_3, DB)
V5 <- 0
V6 <- fun2(pr_3, DB)
V7 <- fun2(pr_sq, sqrt_2_DB)
V8 <- fun2(pr_3, DB)
V9 <- fun2(pr_sq, sqrt_2_DB)
inv <- 1/c(V1, V2, V3, V4, V6, V7, V8, V9)
tot <- sum(inv, na.rm = TRUE)
mat_V <- matrix(data = c(inv[1:4], V5, inv[5:8]) / tot,
nrow = 3, ncol = 3, byrow = TRUE)
newC <- NULL
while(is.null(newC) || identical(c(c3,c4), newC)){
if(identical(c(c3,c4), newC)){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
newC <- c(c1 - 2 + choiceC[2], c2 + 2 - choiceC[1])
# using switch it would have been
# newC <- switch(choiceC[1],
# `1` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 + 1),
# `2` = c(x = c1, y = c2 + 1),
# `3` = c(x = c1 + 1, y = c2 + 1)),
# `2` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2),
# `2` = c(x = c1, y = c2), # you were missing this one
# `3` = c(x = c1 + 1, y = c2)),
# `3` = switch(choiceC[2],
# `1` = c(x = c1 - 1, y = c2 - 1),
# `2` = c(x = c1, y = c2 - 1),
# `3` = c(x = c1 + 1, y = c2 - 1)))
}
t(newC)
}
edited Nov 14 '18 at 0:08
answered Nov 10 '18 at 11:56
Moody_MudskipperMoody_Mudskipper
22.3k32864
22.3k32864
1
Note I corrected an error, I had left an extrasqrt(2)
– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frameexpand.gridis used for its content (for example,listC[,1]andlistC[,2]), is there a way to replace the data frameexpand.grid?
– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,expand.gridexists for a reason so you might really need it, on the other hand iflistC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))and you needlistC[,1], then you needc((c2 + 1) : (c2 + PR))[1], which isc2+1
– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))andlistC[,1](not onlylistC[1,1]) is used to calculate the mean instead ofsample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?
– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
|
show 1 more comment
1
Note I corrected an error, I had left an extrasqrt(2)
– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frameexpand.gridis used for its content (for example,listC[,1]andlistC[,2]), is there a way to replace the data frameexpand.grid?
– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,expand.gridexists for a reason so you might really need it, on the other hand iflistC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))and you needlistC[,1], then you needc((c2 + 1) : (c2 + PR))[1], which isc2+1
– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR)))andlistC[,1](not onlylistC[1,1]) is used to calculate the mean instead ofsample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?
– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
1
1
Note I corrected an error, I had left an extra
sqrt(2)– Moody_Mudskipper
Nov 12 '18 at 9:03
Note I corrected an error, I had left an extra
sqrt(2)– Moody_Mudskipper
Nov 12 '18 at 9:03
Thank you very much for your answer. In the case where the data frame
expand.grid is used for its content (for example, listC[,1] and listC[,2]), is there a way to replace the data frame expand.grid ?– Pierre
Nov 20 '18 at 1:24
Thank you very much for your answer. In the case where the data frame
expand.grid is used for its content (for example, listC[,1] and listC[,2]), is there a way to replace the data frame expand.grid ?– Pierre
Nov 20 '18 at 1:24
Well it depends what you want to do,
expand.grid exists for a reason so you might really need it, on the other hand if listC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) and you need listC[,1], then you need c((c2 + 1) : (c2 + PR))[1], which is c2+1– Moody_Mudskipper
Nov 20 '18 at 8:28
Well it depends what you want to do,
expand.grid exists for a reason so you might really need it, on the other hand if listC == expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) and you need listC[,1], then you need c((c2 + 1) : (c2 + PR))[1], which is c2+1– Moody_Mudskipper
Nov 20 '18 at 8:28
Thank you very much for your answer.
if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) and listC[,1] (not only listC[1,1]) is used to calculate the mean instead of sample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?– Pierre
Nov 20 '18 at 15:31
Thank you very much for your answer.
if listC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) and listC[,1] (not only listC[1,1]) is used to calculate the mean instead of sample(1:10, size = dim(listC)[1], replace = TRUE), expand.grid is required. Right?– Pierre
Nov 20 '18 at 15:31
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
I've tested the function with modifications but it's still very long.
– Pierre
Nov 22 '18 at 14:58
|
show 1 more comment
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53192662%2frunning-multiple-conditions-at-once-in-r%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
5
I guess you would get more (and faster) answers if you provide a minimal reproducible example as code instead of asking to refactor your "production" code. Just my opinion...
– R Yoda
Nov 9 '18 at 17:16
Please add: Which packages are you using? Is it OK to use
data.tableinstead ofdata.frame? First impression for optimization: Looping over each row is very inefficient, vectorization would be faster (e. g.ifelseinstead ofif)– R Yoda
Nov 9 '18 at 17:25
2
While one could inspect your code line by line and try to improve it, it would be much better if you clearly described (in your question) what your function does in words; perhaps then it will become clear that it's better to completely rewrite it rather than to debate on individual things like
ifelsevsif.– Julius Vainora
Nov 9 '18 at 19:31