Runtime optimization of Agent-based model written with R
up vote
0
down vote
favorite
In order to simulate the admixture of indigenous Papuan people with migrants from Asia during the settlement of Oceania, I've written an agent-based model with R (see code below). The model has 6 input parameters. The "true" input parameters should be estimated by comparing the model output for given parameter sets with observed admixture rates across the Pacific islands.
Unfortunately, (as an autodidact) I've never learned to write time-efficient code with R. Thus, I have no idea how I can reduce the time needed for one full run (1 parameter set, 4500 simulated years). Therefore, I hope that some of you could help me with this task (I would even pay for it). The final goal is to optimize the code such that one full run takes less than 2 minutes (then I can parallelize the first loop).
Here is the code ...
#increase memory limits
memory.limit(17592186044415)
memory.size(max=T)
#set working directory
setwd("working.directory")
#loading required packages
library(rlist)
#loading data sets
migration <- list.load("migration.rdata")
admixture <- read.csv("admixture estimates.csv", header=T)
history <- read.csv("history.csv", header=T)
#initial individual number
ind.number <- 21000
#birth & death rates
BD <- data.frame(age=0:55,
death_rate=c(0.02, 0.035, 0.0476, 0.07, 0.09, 0.1, 0.1032, 0.1032, 0.093, 0.072, 0.05, 0.047, 0.046, 0.0443, 0.048, 0.055, 0.062, 0.064, 0.0648, 0.073, 0.08, 0.09, 0.093, 0.098, 0.123, 0.145, 0.167, 0.18, 0.1965, 0.198, 0.1995, 0.201, 0.202, 0.2036, 0.23, 0.25, 0.268, 0.27, 0.2757, 0.278, 0.28, 0.283, 0.285, 0.286, 0.29, 0.31, 0.37, 0.42, 0.5, 0.62, 0.73, 0.84, 0.89, 0.94, 0.97, 1),
birth.rate=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0.003, 0.01, 0.04, 0.09, 0.13, 0.17, 0.24, 0.27, 0.29, 0.31, 0.31, 0.295, 0.28, 0.265, 0.245, 0.225, 0.205, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.075, 0.06, 0.045, 0.03, 0.01, 0.007, 0.0055, 0.0035, 0.0025, 0.0024, 0.0022, 0.0015, 0.001, 0.0005, 0.0001, 0, 0, 0, 0, 0)
)
BD$survival <- with(BD, { s <- cumsum(death_rate); s <- max(s)-s; s <- s/sum(s); c(s[1:55], NA)})
BD$death.rate <- c(0, -diff(BD$survival/BD$survival[1]))
BD$death.rate[nrow(BD)] <- 1
#Creating matrix for parameter combinations & simulation results
PopulParam <- list(
asian.fecundities = c(3, 4, 5, 6, 7),
papuan.fecundities = c(3, 4, 5, 6, 7),
asian.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
papuan.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
marriage.weights = c(0.00, 0.05, 0.10, 0.15, 0.20, 0.25),
migration.distances = seq(0.1,1,by=0.1)#c(0.2, 0.4, 0.6, 0.8, 1.0)
)
runsummary <- expand.grid(PopulParam)
runsummary <- runsummary[rep(1:nrow(runsummary), 100),]
runsummary <- data.frame(runID=1:nrow(runsummary), runsummary, Aauto=NA, Amt=NA, AY=NA)
#simulations
for (i in 1:nrow(runsummary)) {
#initialization
runlist <- data.frame(indID=1:26000000,
sex=NA,
birth.year=NA,
birth.place=NA,
death.year=NA,
death.place=NA,
residence=NA,
year.of.migration=NA,
partner=NA,
parent1=NA,
parent2=NA,
Aauto=NA,
Amt=NA,
AY=NA,
Aoverall=NA,
max.offspring=NA,
offspring.number=0,
time.of.migration=NA
)
runlist$sex[1:ind.number] <- sample(c("m", "f"), ind.number, replace=T)
runlist$birth.year[1:ind.number] <- (-1) * sample(0:54, ind.number, prob=BD$survival[1:55], replace=T)
runlist$birth.place[1:ind.number] <- rep(which(history$initial.settlement==0), each=120)
runlist$residence <- runlist$birth.place
group <- history$initial.group[runlist$birth.place][1:ind.number]
runlist$Aauto[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Amt[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$AY[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Aoverall[1:ind.number] <- ifelse(group == "Asian", 1, 0)
females <- which(runlist$sex == "f")
asians <- which(runlist$Aoverall > 0.5)
papuans <- which(runlist$Aoverall < 0.5)
runlist$max.offspring[females[females %in% asians]] <- rpois(sum(females %in% asians), runsummary$asian.fecundities[i])
runlist$max.offspring[females[females %in% papuans]] <- rpois(sum(females %in% papuans), runsummary$papuan.fecundities[i])
autosomes <- matrix(nrow=ind.number, ncol=26)
colnames(autosomes) <- c("indID", "auto1", "auto2", "auto3", "auto4", "auto5", "auto6", "auto7", "auto8", "auto9", "auto10", "auto11", "auto12", "auto13", "auto14", "auto15", "auto16", "auto17", "auto18", "auto19", "auto20", "auto21", "auto22", "auto23", "auto24", "auto25")
autosomes[,1] <- 1:ind.number
group <- ifelse(group=="Asian", 1, 0)
autosomes[,2:26] <- rep(group, 25)
#run simulations (actions for one year)
for (t in 0:4500) {
carrying.capacity <- 120 * 1.0003^t
#deaths
alive <- which(is.na(runlist$death.year) & !is.na(runlist$birth.year)) #living individuals
for (j in 2:56) {
candidates <- alive[t-runlist$birth.year[alive] == j-1] #individuals with age = j-1
deaths <- candidates[which(sample(c(0, 1), length(candidates), prob=c(1 - BD$death.rate[j], BD$death.rate[j]), replace = T) == 1)] #choose candidates with BD$death.rate probability
runlist$death.year[deaths] <- t #set death.year
runlist$death.place[deaths] <- runlist$residence[deaths] #set death.place
partner <- runlist$partner[deaths] #identify partners
if (length(which(!is.na(partner))) > 0) {
runlist$partner[partner[which(!is.na(partner))]] <- NA #divorce partners
}
autosomes <- autosomes[-deaths,] #delete autosomes of deaths to save memory
}
#migration
migratables <- which(t - runlist$birth.year > 11) #individuals with age > 11
migratables <- migratables[which(is.na(runlist$death.year[migratables]))] #alive individuals
migratables <- migratables[which(is.na(runlist$partner[migratables]))] #uncoupled
migratables <- migratables[which(runlist$residence[migratables] == runlist$birth.place[migratables])] #not yet migrated
migratables <- sample(migratables, length(migratables)) #randomize order
for (mig in migratables) {
if (runif(1) < ifelse(runlist$Aoverall[mig] > 0.5, runsummary$asian.migrations[i], runsummary$papuan.migrations[i])) { #will individual j migrate?
deme.candidates <- migration[[runlist$residence[mig]]][,1]
possible <- NULL
for (k in 1:length(deme.candidates)) {
if (!is.na(history$initial.settlement[deme.candidates[k]]) || t >= history$Austronesian.settlement[deme.candidates[k]]) { #deme can be settled at time t
if (length(which(runlist$residence==deme.candidates[k] & !is.na(runlist$birth.year) & is.na(runlist$death.year))) < round(carrying.capacity)) { #deme 'deme.candidates[k]' below carrying capacity
if (runlist$Aoverall[mig] < 0.5) { #test if deme k is within reachable distance for Papuan individuals
if (migration[[runlist$residence[mig]]][k,4] < runsummary[i,7]) {
possible <- c(possible, deme.candidates[k])
}
}
else {
possible <- c(possible, deme.candidates[k])
}
}
}
}
#send individuals with probability 'prob' to the 'possible' deme candidates
dists <- migration[[runlist$residence[mig]]][which(migration[[runlist$residence[mig]]][,1] %in% possible),4]
prob <- dists/sum(dists)
if (length(possible) > 0) {
if (length(prob) == 1) {
new.deme <- possible
}
if (length(prob) > 1) {
new.deme <- sample(possible, 1, prob=prob)
}
runlist$residence[mig] <- new.deme
runlist$time.of.migration[mig] <- t
}
}
}
#coupling & offspring
fertiles <- which(runlist$sex == "f") #select females
fertiles <- fertiles[which(t - runlist$birth.year[fertiles] > 11)] #individuals with age > 11
fertiles <- fertiles[which(is.na(runlist$death.year[fertiles]))] #alive individuals
fertiles <- sample(fertiles, length(fertiles)) #randomize order
for (fer in fertiles) {
if (is.na(runlist$partner[fer])) {
#coupling
partner.candidates <- which(runlist$residence == runlist$residence[fer]) #individuals in same deme
partner.candidates <- partner.candidates[which(runlist$sex[partner.candidates] == "m")] #males in same deme
partner.candidates <- partner.candidates[which(t - runlist$birth.year[partner.candidates] > 11 & is.na(runlist$death.year[partner.candidates]))] #alive males and age > 11 years
partner.candidates <- partner.candidates[which(abs(runlist$birth.year[fer] - runlist$birth.year[partner.candidates]) < 7)] #age difference < 7 years
partner.ancestry <- ifelse(runlist$Aoverall[partner.candidates] > 0.5, 1, 0) #ancestry of candidates
#females marry more often (0.5 + runsummary$marriage.weights[i]) males from other ancestry
if (runlist$Aoverall[fer] > 0.5) { #for Asian women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 + runsummary$marriage.weights[i], 0.5 - runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
else { #for Papuan women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 - runsummary$marriage.weights[i], 0.5 + runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
runlist$partner[fer] <- new.partner
runlist$partner[new.partner] <- fer
}
else {
#generate offspring
if (runlist$offspring.number[fer] < runlist$max.offspring[fer]) { #number of children < max. offspring number for female 'fer'
if (length(which(runlist$residence == runlist$residence[fer])) < round(carrying.capacity)) { #deme size < carrying capacity
if (runif(1) < BD$birth.rate[(t - runlist$birth.year[fer]) + 1]) { #test for pregnancy (using birth rate given age of mother)
runlist$offspring.number[fer] <- runlist$offspring.number[fer] + 1 #add children to offspring.number of mother
runlist$offspring.number[runlist$partner[fer]] <- runlist$offspring.number[runlist$partner[fer]] + 1 #add children to offspring.number of father
#initialization of child
childID <- min(which(is.na(runlist$sex)))
runlist$sex[childID] <- ifelse(runif(1) < 0.5, "f", "m")
runlist$birth.year[childID] <- t
runlist$birth.place[childID] <- runlist$residence[fer]
runlist$residence[childID] <- runlist$residence[fer]
runlist$parent1[childID] <- fer
runlist$parent2[childID] <- runlist$partner[fer]
#set mtDNA ancestry from mother
runlist$Amt[childID] <- runlist$Amt[fer]
#set Y-DNA ancestry from father (if child is male)
if (runlist$sex[childID] == "m") {
runlist$AY[childID] <- runlist$AY[runlist$partner[fer]]
}
#random combination of autosomal markers from mother & father
autosomes.row <- nrow(autosomes) + 1 #identify the row number of children in autosomes
autosomes <- rbind(autosomes, NA) #expand autosomes by 1 row
autosomes[autosomes.row, 1] <- childID #set individual ID
child.autosome <- autosomes[which(autosomes[,1] == fer), 2:26] #set mother's autosomes for child
fathers.autosomes <- sort(sample(1:25, sample(12:13, 1))) #select 12 or 13 genes from father's autosomes
child.autosome[fathers.autosomes] <- autosomes[which(autosomes[,1] == runlist$partner[fer]), fathers.autosomes + 1] #replace mother's autosomal markers in childs autosomes
autosomes[autosomes.row, 2:26] <- child.autosome #add autosome of child to 'autosomes'
#calculate Aauto
runlist$Aauto[childID] <- mean(autosomes[autosomes.row, 2:26])
#calculate Aoverall
runlist$Aoverall[childID] <- mean(runlist[childID, 12:14])
}
}
}
}
print(which(fertiles==fer))
}
}
#calculate average distance between simulated and observed ancestries
auto <- NULL
mt <- NULL
Y <- NULL
for (j in 1:length(migration)) {
alive.in.deme.at.t.4500 <- which(runlist$birth.year <= 4500 & is.na(runlist$death.year) & runlist$residence == j)
auto[j] <- mean(runlist$Aauto[alive.in.deme.at.t.4500])
mt[j] <- mean(runlist$Amt[alive.in.deme.at.t.4500])
Y[j] <- mean(runlist$AY[alive.in.deme.at.t.4500])
}
runsummary$Aauto[i] <- mean(abs(auto - admixture$autosomal), na.rm = T)
runsummary$Amt[i] <- mean(abs(mt - admixture$mt.SNPs), na.rm = T)
runsummary$AY[i] <- mean(abs(Y - admixture$Y.SNPs), na.rm = T)
#save files
write.csv(runlist, paste("runlist", i, ".csv", sep=""), row.names=F)
write.csv(runsummary, "runsummary.csv", row.names=F)
#return number of parameter set simulated
print(i)
}
... and here are the required files.
r optimization runtime agent-based-modeling
add a comment |
up vote
0
down vote
favorite
In order to simulate the admixture of indigenous Papuan people with migrants from Asia during the settlement of Oceania, I've written an agent-based model with R (see code below). The model has 6 input parameters. The "true" input parameters should be estimated by comparing the model output for given parameter sets with observed admixture rates across the Pacific islands.
Unfortunately, (as an autodidact) I've never learned to write time-efficient code with R. Thus, I have no idea how I can reduce the time needed for one full run (1 parameter set, 4500 simulated years). Therefore, I hope that some of you could help me with this task (I would even pay for it). The final goal is to optimize the code such that one full run takes less than 2 minutes (then I can parallelize the first loop).
Here is the code ...
#increase memory limits
memory.limit(17592186044415)
memory.size(max=T)
#set working directory
setwd("working.directory")
#loading required packages
library(rlist)
#loading data sets
migration <- list.load("migration.rdata")
admixture <- read.csv("admixture estimates.csv", header=T)
history <- read.csv("history.csv", header=T)
#initial individual number
ind.number <- 21000
#birth & death rates
BD <- data.frame(age=0:55,
death_rate=c(0.02, 0.035, 0.0476, 0.07, 0.09, 0.1, 0.1032, 0.1032, 0.093, 0.072, 0.05, 0.047, 0.046, 0.0443, 0.048, 0.055, 0.062, 0.064, 0.0648, 0.073, 0.08, 0.09, 0.093, 0.098, 0.123, 0.145, 0.167, 0.18, 0.1965, 0.198, 0.1995, 0.201, 0.202, 0.2036, 0.23, 0.25, 0.268, 0.27, 0.2757, 0.278, 0.28, 0.283, 0.285, 0.286, 0.29, 0.31, 0.37, 0.42, 0.5, 0.62, 0.73, 0.84, 0.89, 0.94, 0.97, 1),
birth.rate=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0.003, 0.01, 0.04, 0.09, 0.13, 0.17, 0.24, 0.27, 0.29, 0.31, 0.31, 0.295, 0.28, 0.265, 0.245, 0.225, 0.205, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.075, 0.06, 0.045, 0.03, 0.01, 0.007, 0.0055, 0.0035, 0.0025, 0.0024, 0.0022, 0.0015, 0.001, 0.0005, 0.0001, 0, 0, 0, 0, 0)
)
BD$survival <- with(BD, { s <- cumsum(death_rate); s <- max(s)-s; s <- s/sum(s); c(s[1:55], NA)})
BD$death.rate <- c(0, -diff(BD$survival/BD$survival[1]))
BD$death.rate[nrow(BD)] <- 1
#Creating matrix for parameter combinations & simulation results
PopulParam <- list(
asian.fecundities = c(3, 4, 5, 6, 7),
papuan.fecundities = c(3, 4, 5, 6, 7),
asian.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
papuan.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
marriage.weights = c(0.00, 0.05, 0.10, 0.15, 0.20, 0.25),
migration.distances = seq(0.1,1,by=0.1)#c(0.2, 0.4, 0.6, 0.8, 1.0)
)
runsummary <- expand.grid(PopulParam)
runsummary <- runsummary[rep(1:nrow(runsummary), 100),]
runsummary <- data.frame(runID=1:nrow(runsummary), runsummary, Aauto=NA, Amt=NA, AY=NA)
#simulations
for (i in 1:nrow(runsummary)) {
#initialization
runlist <- data.frame(indID=1:26000000,
sex=NA,
birth.year=NA,
birth.place=NA,
death.year=NA,
death.place=NA,
residence=NA,
year.of.migration=NA,
partner=NA,
parent1=NA,
parent2=NA,
Aauto=NA,
Amt=NA,
AY=NA,
Aoverall=NA,
max.offspring=NA,
offspring.number=0,
time.of.migration=NA
)
runlist$sex[1:ind.number] <- sample(c("m", "f"), ind.number, replace=T)
runlist$birth.year[1:ind.number] <- (-1) * sample(0:54, ind.number, prob=BD$survival[1:55], replace=T)
runlist$birth.place[1:ind.number] <- rep(which(history$initial.settlement==0), each=120)
runlist$residence <- runlist$birth.place
group <- history$initial.group[runlist$birth.place][1:ind.number]
runlist$Aauto[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Amt[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$AY[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Aoverall[1:ind.number] <- ifelse(group == "Asian", 1, 0)
females <- which(runlist$sex == "f")
asians <- which(runlist$Aoverall > 0.5)
papuans <- which(runlist$Aoverall < 0.5)
runlist$max.offspring[females[females %in% asians]] <- rpois(sum(females %in% asians), runsummary$asian.fecundities[i])
runlist$max.offspring[females[females %in% papuans]] <- rpois(sum(females %in% papuans), runsummary$papuan.fecundities[i])
autosomes <- matrix(nrow=ind.number, ncol=26)
colnames(autosomes) <- c("indID", "auto1", "auto2", "auto3", "auto4", "auto5", "auto6", "auto7", "auto8", "auto9", "auto10", "auto11", "auto12", "auto13", "auto14", "auto15", "auto16", "auto17", "auto18", "auto19", "auto20", "auto21", "auto22", "auto23", "auto24", "auto25")
autosomes[,1] <- 1:ind.number
group <- ifelse(group=="Asian", 1, 0)
autosomes[,2:26] <- rep(group, 25)
#run simulations (actions for one year)
for (t in 0:4500) {
carrying.capacity <- 120 * 1.0003^t
#deaths
alive <- which(is.na(runlist$death.year) & !is.na(runlist$birth.year)) #living individuals
for (j in 2:56) {
candidates <- alive[t-runlist$birth.year[alive] == j-1] #individuals with age = j-1
deaths <- candidates[which(sample(c(0, 1), length(candidates), prob=c(1 - BD$death.rate[j], BD$death.rate[j]), replace = T) == 1)] #choose candidates with BD$death.rate probability
runlist$death.year[deaths] <- t #set death.year
runlist$death.place[deaths] <- runlist$residence[deaths] #set death.place
partner <- runlist$partner[deaths] #identify partners
if (length(which(!is.na(partner))) > 0) {
runlist$partner[partner[which(!is.na(partner))]] <- NA #divorce partners
}
autosomes <- autosomes[-deaths,] #delete autosomes of deaths to save memory
}
#migration
migratables <- which(t - runlist$birth.year > 11) #individuals with age > 11
migratables <- migratables[which(is.na(runlist$death.year[migratables]))] #alive individuals
migratables <- migratables[which(is.na(runlist$partner[migratables]))] #uncoupled
migratables <- migratables[which(runlist$residence[migratables] == runlist$birth.place[migratables])] #not yet migrated
migratables <- sample(migratables, length(migratables)) #randomize order
for (mig in migratables) {
if (runif(1) < ifelse(runlist$Aoverall[mig] > 0.5, runsummary$asian.migrations[i], runsummary$papuan.migrations[i])) { #will individual j migrate?
deme.candidates <- migration[[runlist$residence[mig]]][,1]
possible <- NULL
for (k in 1:length(deme.candidates)) {
if (!is.na(history$initial.settlement[deme.candidates[k]]) || t >= history$Austronesian.settlement[deme.candidates[k]]) { #deme can be settled at time t
if (length(which(runlist$residence==deme.candidates[k] & !is.na(runlist$birth.year) & is.na(runlist$death.year))) < round(carrying.capacity)) { #deme 'deme.candidates[k]' below carrying capacity
if (runlist$Aoverall[mig] < 0.5) { #test if deme k is within reachable distance for Papuan individuals
if (migration[[runlist$residence[mig]]][k,4] < runsummary[i,7]) {
possible <- c(possible, deme.candidates[k])
}
}
else {
possible <- c(possible, deme.candidates[k])
}
}
}
}
#send individuals with probability 'prob' to the 'possible' deme candidates
dists <- migration[[runlist$residence[mig]]][which(migration[[runlist$residence[mig]]][,1] %in% possible),4]
prob <- dists/sum(dists)
if (length(possible) > 0) {
if (length(prob) == 1) {
new.deme <- possible
}
if (length(prob) > 1) {
new.deme <- sample(possible, 1, prob=prob)
}
runlist$residence[mig] <- new.deme
runlist$time.of.migration[mig] <- t
}
}
}
#coupling & offspring
fertiles <- which(runlist$sex == "f") #select females
fertiles <- fertiles[which(t - runlist$birth.year[fertiles] > 11)] #individuals with age > 11
fertiles <- fertiles[which(is.na(runlist$death.year[fertiles]))] #alive individuals
fertiles <- sample(fertiles, length(fertiles)) #randomize order
for (fer in fertiles) {
if (is.na(runlist$partner[fer])) {
#coupling
partner.candidates <- which(runlist$residence == runlist$residence[fer]) #individuals in same deme
partner.candidates <- partner.candidates[which(runlist$sex[partner.candidates] == "m")] #males in same deme
partner.candidates <- partner.candidates[which(t - runlist$birth.year[partner.candidates] > 11 & is.na(runlist$death.year[partner.candidates]))] #alive males and age > 11 years
partner.candidates <- partner.candidates[which(abs(runlist$birth.year[fer] - runlist$birth.year[partner.candidates]) < 7)] #age difference < 7 years
partner.ancestry <- ifelse(runlist$Aoverall[partner.candidates] > 0.5, 1, 0) #ancestry of candidates
#females marry more often (0.5 + runsummary$marriage.weights[i]) males from other ancestry
if (runlist$Aoverall[fer] > 0.5) { #for Asian women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 + runsummary$marriage.weights[i], 0.5 - runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
else { #for Papuan women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 - runsummary$marriage.weights[i], 0.5 + runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
runlist$partner[fer] <- new.partner
runlist$partner[new.partner] <- fer
}
else {
#generate offspring
if (runlist$offspring.number[fer] < runlist$max.offspring[fer]) { #number of children < max. offspring number for female 'fer'
if (length(which(runlist$residence == runlist$residence[fer])) < round(carrying.capacity)) { #deme size < carrying capacity
if (runif(1) < BD$birth.rate[(t - runlist$birth.year[fer]) + 1]) { #test for pregnancy (using birth rate given age of mother)
runlist$offspring.number[fer] <- runlist$offspring.number[fer] + 1 #add children to offspring.number of mother
runlist$offspring.number[runlist$partner[fer]] <- runlist$offspring.number[runlist$partner[fer]] + 1 #add children to offspring.number of father
#initialization of child
childID <- min(which(is.na(runlist$sex)))
runlist$sex[childID] <- ifelse(runif(1) < 0.5, "f", "m")
runlist$birth.year[childID] <- t
runlist$birth.place[childID] <- runlist$residence[fer]
runlist$residence[childID] <- runlist$residence[fer]
runlist$parent1[childID] <- fer
runlist$parent2[childID] <- runlist$partner[fer]
#set mtDNA ancestry from mother
runlist$Amt[childID] <- runlist$Amt[fer]
#set Y-DNA ancestry from father (if child is male)
if (runlist$sex[childID] == "m") {
runlist$AY[childID] <- runlist$AY[runlist$partner[fer]]
}
#random combination of autosomal markers from mother & father
autosomes.row <- nrow(autosomes) + 1 #identify the row number of children in autosomes
autosomes <- rbind(autosomes, NA) #expand autosomes by 1 row
autosomes[autosomes.row, 1] <- childID #set individual ID
child.autosome <- autosomes[which(autosomes[,1] == fer), 2:26] #set mother's autosomes for child
fathers.autosomes <- sort(sample(1:25, sample(12:13, 1))) #select 12 or 13 genes from father's autosomes
child.autosome[fathers.autosomes] <- autosomes[which(autosomes[,1] == runlist$partner[fer]), fathers.autosomes + 1] #replace mother's autosomal markers in childs autosomes
autosomes[autosomes.row, 2:26] <- child.autosome #add autosome of child to 'autosomes'
#calculate Aauto
runlist$Aauto[childID] <- mean(autosomes[autosomes.row, 2:26])
#calculate Aoverall
runlist$Aoverall[childID] <- mean(runlist[childID, 12:14])
}
}
}
}
print(which(fertiles==fer))
}
}
#calculate average distance between simulated and observed ancestries
auto <- NULL
mt <- NULL
Y <- NULL
for (j in 1:length(migration)) {
alive.in.deme.at.t.4500 <- which(runlist$birth.year <= 4500 & is.na(runlist$death.year) & runlist$residence == j)
auto[j] <- mean(runlist$Aauto[alive.in.deme.at.t.4500])
mt[j] <- mean(runlist$Amt[alive.in.deme.at.t.4500])
Y[j] <- mean(runlist$AY[alive.in.deme.at.t.4500])
}
runsummary$Aauto[i] <- mean(abs(auto - admixture$autosomal), na.rm = T)
runsummary$Amt[i] <- mean(abs(mt - admixture$mt.SNPs), na.rm = T)
runsummary$AY[i] <- mean(abs(Y - admixture$Y.SNPs), na.rm = T)
#save files
write.csv(runlist, paste("runlist", i, ".csv", sep=""), row.names=F)
write.csv(runsummary, "runsummary.csv", row.names=F)
#return number of parameter set simulated
print(i)
}
... and here are the required files.
r optimization runtime agent-based-modeling
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
In order to simulate the admixture of indigenous Papuan people with migrants from Asia during the settlement of Oceania, I've written an agent-based model with R (see code below). The model has 6 input parameters. The "true" input parameters should be estimated by comparing the model output for given parameter sets with observed admixture rates across the Pacific islands.
Unfortunately, (as an autodidact) I've never learned to write time-efficient code with R. Thus, I have no idea how I can reduce the time needed for one full run (1 parameter set, 4500 simulated years). Therefore, I hope that some of you could help me with this task (I would even pay for it). The final goal is to optimize the code such that one full run takes less than 2 minutes (then I can parallelize the first loop).
Here is the code ...
#increase memory limits
memory.limit(17592186044415)
memory.size(max=T)
#set working directory
setwd("working.directory")
#loading required packages
library(rlist)
#loading data sets
migration <- list.load("migration.rdata")
admixture <- read.csv("admixture estimates.csv", header=T)
history <- read.csv("history.csv", header=T)
#initial individual number
ind.number <- 21000
#birth & death rates
BD <- data.frame(age=0:55,
death_rate=c(0.02, 0.035, 0.0476, 0.07, 0.09, 0.1, 0.1032, 0.1032, 0.093, 0.072, 0.05, 0.047, 0.046, 0.0443, 0.048, 0.055, 0.062, 0.064, 0.0648, 0.073, 0.08, 0.09, 0.093, 0.098, 0.123, 0.145, 0.167, 0.18, 0.1965, 0.198, 0.1995, 0.201, 0.202, 0.2036, 0.23, 0.25, 0.268, 0.27, 0.2757, 0.278, 0.28, 0.283, 0.285, 0.286, 0.29, 0.31, 0.37, 0.42, 0.5, 0.62, 0.73, 0.84, 0.89, 0.94, 0.97, 1),
birth.rate=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0.003, 0.01, 0.04, 0.09, 0.13, 0.17, 0.24, 0.27, 0.29, 0.31, 0.31, 0.295, 0.28, 0.265, 0.245, 0.225, 0.205, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.075, 0.06, 0.045, 0.03, 0.01, 0.007, 0.0055, 0.0035, 0.0025, 0.0024, 0.0022, 0.0015, 0.001, 0.0005, 0.0001, 0, 0, 0, 0, 0)
)
BD$survival <- with(BD, { s <- cumsum(death_rate); s <- max(s)-s; s <- s/sum(s); c(s[1:55], NA)})
BD$death.rate <- c(0, -diff(BD$survival/BD$survival[1]))
BD$death.rate[nrow(BD)] <- 1
#Creating matrix for parameter combinations & simulation results
PopulParam <- list(
asian.fecundities = c(3, 4, 5, 6, 7),
papuan.fecundities = c(3, 4, 5, 6, 7),
asian.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
papuan.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
marriage.weights = c(0.00, 0.05, 0.10, 0.15, 0.20, 0.25),
migration.distances = seq(0.1,1,by=0.1)#c(0.2, 0.4, 0.6, 0.8, 1.0)
)
runsummary <- expand.grid(PopulParam)
runsummary <- runsummary[rep(1:nrow(runsummary), 100),]
runsummary <- data.frame(runID=1:nrow(runsummary), runsummary, Aauto=NA, Amt=NA, AY=NA)
#simulations
for (i in 1:nrow(runsummary)) {
#initialization
runlist <- data.frame(indID=1:26000000,
sex=NA,
birth.year=NA,
birth.place=NA,
death.year=NA,
death.place=NA,
residence=NA,
year.of.migration=NA,
partner=NA,
parent1=NA,
parent2=NA,
Aauto=NA,
Amt=NA,
AY=NA,
Aoverall=NA,
max.offspring=NA,
offspring.number=0,
time.of.migration=NA
)
runlist$sex[1:ind.number] <- sample(c("m", "f"), ind.number, replace=T)
runlist$birth.year[1:ind.number] <- (-1) * sample(0:54, ind.number, prob=BD$survival[1:55], replace=T)
runlist$birth.place[1:ind.number] <- rep(which(history$initial.settlement==0), each=120)
runlist$residence <- runlist$birth.place
group <- history$initial.group[runlist$birth.place][1:ind.number]
runlist$Aauto[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Amt[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$AY[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Aoverall[1:ind.number] <- ifelse(group == "Asian", 1, 0)
females <- which(runlist$sex == "f")
asians <- which(runlist$Aoverall > 0.5)
papuans <- which(runlist$Aoverall < 0.5)
runlist$max.offspring[females[females %in% asians]] <- rpois(sum(females %in% asians), runsummary$asian.fecundities[i])
runlist$max.offspring[females[females %in% papuans]] <- rpois(sum(females %in% papuans), runsummary$papuan.fecundities[i])
autosomes <- matrix(nrow=ind.number, ncol=26)
colnames(autosomes) <- c("indID", "auto1", "auto2", "auto3", "auto4", "auto5", "auto6", "auto7", "auto8", "auto9", "auto10", "auto11", "auto12", "auto13", "auto14", "auto15", "auto16", "auto17", "auto18", "auto19", "auto20", "auto21", "auto22", "auto23", "auto24", "auto25")
autosomes[,1] <- 1:ind.number
group <- ifelse(group=="Asian", 1, 0)
autosomes[,2:26] <- rep(group, 25)
#run simulations (actions for one year)
for (t in 0:4500) {
carrying.capacity <- 120 * 1.0003^t
#deaths
alive <- which(is.na(runlist$death.year) & !is.na(runlist$birth.year)) #living individuals
for (j in 2:56) {
candidates <- alive[t-runlist$birth.year[alive] == j-1] #individuals with age = j-1
deaths <- candidates[which(sample(c(0, 1), length(candidates), prob=c(1 - BD$death.rate[j], BD$death.rate[j]), replace = T) == 1)] #choose candidates with BD$death.rate probability
runlist$death.year[deaths] <- t #set death.year
runlist$death.place[deaths] <- runlist$residence[deaths] #set death.place
partner <- runlist$partner[deaths] #identify partners
if (length(which(!is.na(partner))) > 0) {
runlist$partner[partner[which(!is.na(partner))]] <- NA #divorce partners
}
autosomes <- autosomes[-deaths,] #delete autosomes of deaths to save memory
}
#migration
migratables <- which(t - runlist$birth.year > 11) #individuals with age > 11
migratables <- migratables[which(is.na(runlist$death.year[migratables]))] #alive individuals
migratables <- migratables[which(is.na(runlist$partner[migratables]))] #uncoupled
migratables <- migratables[which(runlist$residence[migratables] == runlist$birth.place[migratables])] #not yet migrated
migratables <- sample(migratables, length(migratables)) #randomize order
for (mig in migratables) {
if (runif(1) < ifelse(runlist$Aoverall[mig] > 0.5, runsummary$asian.migrations[i], runsummary$papuan.migrations[i])) { #will individual j migrate?
deme.candidates <- migration[[runlist$residence[mig]]][,1]
possible <- NULL
for (k in 1:length(deme.candidates)) {
if (!is.na(history$initial.settlement[deme.candidates[k]]) || t >= history$Austronesian.settlement[deme.candidates[k]]) { #deme can be settled at time t
if (length(which(runlist$residence==deme.candidates[k] & !is.na(runlist$birth.year) & is.na(runlist$death.year))) < round(carrying.capacity)) { #deme 'deme.candidates[k]' below carrying capacity
if (runlist$Aoverall[mig] < 0.5) { #test if deme k is within reachable distance for Papuan individuals
if (migration[[runlist$residence[mig]]][k,4] < runsummary[i,7]) {
possible <- c(possible, deme.candidates[k])
}
}
else {
possible <- c(possible, deme.candidates[k])
}
}
}
}
#send individuals with probability 'prob' to the 'possible' deme candidates
dists <- migration[[runlist$residence[mig]]][which(migration[[runlist$residence[mig]]][,1] %in% possible),4]
prob <- dists/sum(dists)
if (length(possible) > 0) {
if (length(prob) == 1) {
new.deme <- possible
}
if (length(prob) > 1) {
new.deme <- sample(possible, 1, prob=prob)
}
runlist$residence[mig] <- new.deme
runlist$time.of.migration[mig] <- t
}
}
}
#coupling & offspring
fertiles <- which(runlist$sex == "f") #select females
fertiles <- fertiles[which(t - runlist$birth.year[fertiles] > 11)] #individuals with age > 11
fertiles <- fertiles[which(is.na(runlist$death.year[fertiles]))] #alive individuals
fertiles <- sample(fertiles, length(fertiles)) #randomize order
for (fer in fertiles) {
if (is.na(runlist$partner[fer])) {
#coupling
partner.candidates <- which(runlist$residence == runlist$residence[fer]) #individuals in same deme
partner.candidates <- partner.candidates[which(runlist$sex[partner.candidates] == "m")] #males in same deme
partner.candidates <- partner.candidates[which(t - runlist$birth.year[partner.candidates] > 11 & is.na(runlist$death.year[partner.candidates]))] #alive males and age > 11 years
partner.candidates <- partner.candidates[which(abs(runlist$birth.year[fer] - runlist$birth.year[partner.candidates]) < 7)] #age difference < 7 years
partner.ancestry <- ifelse(runlist$Aoverall[partner.candidates] > 0.5, 1, 0) #ancestry of candidates
#females marry more often (0.5 + runsummary$marriage.weights[i]) males from other ancestry
if (runlist$Aoverall[fer] > 0.5) { #for Asian women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 + runsummary$marriage.weights[i], 0.5 - runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
else { #for Papuan women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 - runsummary$marriage.weights[i], 0.5 + runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
runlist$partner[fer] <- new.partner
runlist$partner[new.partner] <- fer
}
else {
#generate offspring
if (runlist$offspring.number[fer] < runlist$max.offspring[fer]) { #number of children < max. offspring number for female 'fer'
if (length(which(runlist$residence == runlist$residence[fer])) < round(carrying.capacity)) { #deme size < carrying capacity
if (runif(1) < BD$birth.rate[(t - runlist$birth.year[fer]) + 1]) { #test for pregnancy (using birth rate given age of mother)
runlist$offspring.number[fer] <- runlist$offspring.number[fer] + 1 #add children to offspring.number of mother
runlist$offspring.number[runlist$partner[fer]] <- runlist$offspring.number[runlist$partner[fer]] + 1 #add children to offspring.number of father
#initialization of child
childID <- min(which(is.na(runlist$sex)))
runlist$sex[childID] <- ifelse(runif(1) < 0.5, "f", "m")
runlist$birth.year[childID] <- t
runlist$birth.place[childID] <- runlist$residence[fer]
runlist$residence[childID] <- runlist$residence[fer]
runlist$parent1[childID] <- fer
runlist$parent2[childID] <- runlist$partner[fer]
#set mtDNA ancestry from mother
runlist$Amt[childID] <- runlist$Amt[fer]
#set Y-DNA ancestry from father (if child is male)
if (runlist$sex[childID] == "m") {
runlist$AY[childID] <- runlist$AY[runlist$partner[fer]]
}
#random combination of autosomal markers from mother & father
autosomes.row <- nrow(autosomes) + 1 #identify the row number of children in autosomes
autosomes <- rbind(autosomes, NA) #expand autosomes by 1 row
autosomes[autosomes.row, 1] <- childID #set individual ID
child.autosome <- autosomes[which(autosomes[,1] == fer), 2:26] #set mother's autosomes for child
fathers.autosomes <- sort(sample(1:25, sample(12:13, 1))) #select 12 or 13 genes from father's autosomes
child.autosome[fathers.autosomes] <- autosomes[which(autosomes[,1] == runlist$partner[fer]), fathers.autosomes + 1] #replace mother's autosomal markers in childs autosomes
autosomes[autosomes.row, 2:26] <- child.autosome #add autosome of child to 'autosomes'
#calculate Aauto
runlist$Aauto[childID] <- mean(autosomes[autosomes.row, 2:26])
#calculate Aoverall
runlist$Aoverall[childID] <- mean(runlist[childID, 12:14])
}
}
}
}
print(which(fertiles==fer))
}
}
#calculate average distance between simulated and observed ancestries
auto <- NULL
mt <- NULL
Y <- NULL
for (j in 1:length(migration)) {
alive.in.deme.at.t.4500 <- which(runlist$birth.year <= 4500 & is.na(runlist$death.year) & runlist$residence == j)
auto[j] <- mean(runlist$Aauto[alive.in.deme.at.t.4500])
mt[j] <- mean(runlist$Amt[alive.in.deme.at.t.4500])
Y[j] <- mean(runlist$AY[alive.in.deme.at.t.4500])
}
runsummary$Aauto[i] <- mean(abs(auto - admixture$autosomal), na.rm = T)
runsummary$Amt[i] <- mean(abs(mt - admixture$mt.SNPs), na.rm = T)
runsummary$AY[i] <- mean(abs(Y - admixture$Y.SNPs), na.rm = T)
#save files
write.csv(runlist, paste("runlist", i, ".csv", sep=""), row.names=F)
write.csv(runsummary, "runsummary.csv", row.names=F)
#return number of parameter set simulated
print(i)
}
... and here are the required files.
r optimization runtime agent-based-modeling
In order to simulate the admixture of indigenous Papuan people with migrants from Asia during the settlement of Oceania, I've written an agent-based model with R (see code below). The model has 6 input parameters. The "true" input parameters should be estimated by comparing the model output for given parameter sets with observed admixture rates across the Pacific islands.
Unfortunately, (as an autodidact) I've never learned to write time-efficient code with R. Thus, I have no idea how I can reduce the time needed for one full run (1 parameter set, 4500 simulated years). Therefore, I hope that some of you could help me with this task (I would even pay for it). The final goal is to optimize the code such that one full run takes less than 2 minutes (then I can parallelize the first loop).
Here is the code ...
#increase memory limits
memory.limit(17592186044415)
memory.size(max=T)
#set working directory
setwd("working.directory")
#loading required packages
library(rlist)
#loading data sets
migration <- list.load("migration.rdata")
admixture <- read.csv("admixture estimates.csv", header=T)
history <- read.csv("history.csv", header=T)
#initial individual number
ind.number <- 21000
#birth & death rates
BD <- data.frame(age=0:55,
death_rate=c(0.02, 0.035, 0.0476, 0.07, 0.09, 0.1, 0.1032, 0.1032, 0.093, 0.072, 0.05, 0.047, 0.046, 0.0443, 0.048, 0.055, 0.062, 0.064, 0.0648, 0.073, 0.08, 0.09, 0.093, 0.098, 0.123, 0.145, 0.167, 0.18, 0.1965, 0.198, 0.1995, 0.201, 0.202, 0.2036, 0.23, 0.25, 0.268, 0.27, 0.2757, 0.278, 0.28, 0.283, 0.285, 0.286, 0.29, 0.31, 0.37, 0.42, 0.5, 0.62, 0.73, 0.84, 0.89, 0.94, 0.97, 1),
birth.rate=c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0.003, 0.01, 0.04, 0.09, 0.13, 0.17, 0.24, 0.27, 0.29, 0.31, 0.31, 0.295, 0.28, 0.265, 0.245, 0.225, 0.205, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.075, 0.06, 0.045, 0.03, 0.01, 0.007, 0.0055, 0.0035, 0.0025, 0.0024, 0.0022, 0.0015, 0.001, 0.0005, 0.0001, 0, 0, 0, 0, 0)
)
BD$survival <- with(BD, { s <- cumsum(death_rate); s <- max(s)-s; s <- s/sum(s); c(s[1:55], NA)})
BD$death.rate <- c(0, -diff(BD$survival/BD$survival[1]))
BD$death.rate[nrow(BD)] <- 1
#Creating matrix for parameter combinations & simulation results
PopulParam <- list(
asian.fecundities = c(3, 4, 5, 6, 7),
papuan.fecundities = c(3, 4, 5, 6, 7),
asian.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
papuan.migrations = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8),
marriage.weights = c(0.00, 0.05, 0.10, 0.15, 0.20, 0.25),
migration.distances = seq(0.1,1,by=0.1)#c(0.2, 0.4, 0.6, 0.8, 1.0)
)
runsummary <- expand.grid(PopulParam)
runsummary <- runsummary[rep(1:nrow(runsummary), 100),]
runsummary <- data.frame(runID=1:nrow(runsummary), runsummary, Aauto=NA, Amt=NA, AY=NA)
#simulations
for (i in 1:nrow(runsummary)) {
#initialization
runlist <- data.frame(indID=1:26000000,
sex=NA,
birth.year=NA,
birth.place=NA,
death.year=NA,
death.place=NA,
residence=NA,
year.of.migration=NA,
partner=NA,
parent1=NA,
parent2=NA,
Aauto=NA,
Amt=NA,
AY=NA,
Aoverall=NA,
max.offspring=NA,
offspring.number=0,
time.of.migration=NA
)
runlist$sex[1:ind.number] <- sample(c("m", "f"), ind.number, replace=T)
runlist$birth.year[1:ind.number] <- (-1) * sample(0:54, ind.number, prob=BD$survival[1:55], replace=T)
runlist$birth.place[1:ind.number] <- rep(which(history$initial.settlement==0), each=120)
runlist$residence <- runlist$birth.place
group <- history$initial.group[runlist$birth.place][1:ind.number]
runlist$Aauto[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Amt[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$AY[1:ind.number] <- ifelse(group == "Asian", 1, 0)
runlist$Aoverall[1:ind.number] <- ifelse(group == "Asian", 1, 0)
females <- which(runlist$sex == "f")
asians <- which(runlist$Aoverall > 0.5)
papuans <- which(runlist$Aoverall < 0.5)
runlist$max.offspring[females[females %in% asians]] <- rpois(sum(females %in% asians), runsummary$asian.fecundities[i])
runlist$max.offspring[females[females %in% papuans]] <- rpois(sum(females %in% papuans), runsummary$papuan.fecundities[i])
autosomes <- matrix(nrow=ind.number, ncol=26)
colnames(autosomes) <- c("indID", "auto1", "auto2", "auto3", "auto4", "auto5", "auto6", "auto7", "auto8", "auto9", "auto10", "auto11", "auto12", "auto13", "auto14", "auto15", "auto16", "auto17", "auto18", "auto19", "auto20", "auto21", "auto22", "auto23", "auto24", "auto25")
autosomes[,1] <- 1:ind.number
group <- ifelse(group=="Asian", 1, 0)
autosomes[,2:26] <- rep(group, 25)
#run simulations (actions for one year)
for (t in 0:4500) {
carrying.capacity <- 120 * 1.0003^t
#deaths
alive <- which(is.na(runlist$death.year) & !is.na(runlist$birth.year)) #living individuals
for (j in 2:56) {
candidates <- alive[t-runlist$birth.year[alive] == j-1] #individuals with age = j-1
deaths <- candidates[which(sample(c(0, 1), length(candidates), prob=c(1 - BD$death.rate[j], BD$death.rate[j]), replace = T) == 1)] #choose candidates with BD$death.rate probability
runlist$death.year[deaths] <- t #set death.year
runlist$death.place[deaths] <- runlist$residence[deaths] #set death.place
partner <- runlist$partner[deaths] #identify partners
if (length(which(!is.na(partner))) > 0) {
runlist$partner[partner[which(!is.na(partner))]] <- NA #divorce partners
}
autosomes <- autosomes[-deaths,] #delete autosomes of deaths to save memory
}
#migration
migratables <- which(t - runlist$birth.year > 11) #individuals with age > 11
migratables <- migratables[which(is.na(runlist$death.year[migratables]))] #alive individuals
migratables <- migratables[which(is.na(runlist$partner[migratables]))] #uncoupled
migratables <- migratables[which(runlist$residence[migratables] == runlist$birth.place[migratables])] #not yet migrated
migratables <- sample(migratables, length(migratables)) #randomize order
for (mig in migratables) {
if (runif(1) < ifelse(runlist$Aoverall[mig] > 0.5, runsummary$asian.migrations[i], runsummary$papuan.migrations[i])) { #will individual j migrate?
deme.candidates <- migration[[runlist$residence[mig]]][,1]
possible <- NULL
for (k in 1:length(deme.candidates)) {
if (!is.na(history$initial.settlement[deme.candidates[k]]) || t >= history$Austronesian.settlement[deme.candidates[k]]) { #deme can be settled at time t
if (length(which(runlist$residence==deme.candidates[k] & !is.na(runlist$birth.year) & is.na(runlist$death.year))) < round(carrying.capacity)) { #deme 'deme.candidates[k]' below carrying capacity
if (runlist$Aoverall[mig] < 0.5) { #test if deme k is within reachable distance for Papuan individuals
if (migration[[runlist$residence[mig]]][k,4] < runsummary[i,7]) {
possible <- c(possible, deme.candidates[k])
}
}
else {
possible <- c(possible, deme.candidates[k])
}
}
}
}
#send individuals with probability 'prob' to the 'possible' deme candidates
dists <- migration[[runlist$residence[mig]]][which(migration[[runlist$residence[mig]]][,1] %in% possible),4]
prob <- dists/sum(dists)
if (length(possible) > 0) {
if (length(prob) == 1) {
new.deme <- possible
}
if (length(prob) > 1) {
new.deme <- sample(possible, 1, prob=prob)
}
runlist$residence[mig] <- new.deme
runlist$time.of.migration[mig] <- t
}
}
}
#coupling & offspring
fertiles <- which(runlist$sex == "f") #select females
fertiles <- fertiles[which(t - runlist$birth.year[fertiles] > 11)] #individuals with age > 11
fertiles <- fertiles[which(is.na(runlist$death.year[fertiles]))] #alive individuals
fertiles <- sample(fertiles, length(fertiles)) #randomize order
for (fer in fertiles) {
if (is.na(runlist$partner[fer])) {
#coupling
partner.candidates <- which(runlist$residence == runlist$residence[fer]) #individuals in same deme
partner.candidates <- partner.candidates[which(runlist$sex[partner.candidates] == "m")] #males in same deme
partner.candidates <- partner.candidates[which(t - runlist$birth.year[partner.candidates] > 11 & is.na(runlist$death.year[partner.candidates]))] #alive males and age > 11 years
partner.candidates <- partner.candidates[which(abs(runlist$birth.year[fer] - runlist$birth.year[partner.candidates]) < 7)] #age difference < 7 years
partner.ancestry <- ifelse(runlist$Aoverall[partner.candidates] > 0.5, 1, 0) #ancestry of candidates
#females marry more often (0.5 + runsummary$marriage.weights[i]) males from other ancestry
if (runlist$Aoverall[fer] > 0.5) { #for Asian women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 + runsummary$marriage.weights[i], 0.5 - runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
else { #for Papuan women
if (sum(partner.ancestry) == 0 || sum(partner.ancestry) == length(partner.ancestry)) {
new.partner <- sample(partner.candidates, 1)
}
else {
which.partner <- sample(c(0,1), 1, prob=c(0.5 - runsummary$marriage.weights[i], 0.5 + runsummary$marriage.weights[i]))
if (sum(partner.ancestry == which.partner) == 1) {
new.partner <- partner.candidates[which(partner.ancestry == which.partner)]
}
else {
new.partner <- sample(partner.candidates[which(partner.ancestry == which.partner)], 1)
}
}
}
runlist$partner[fer] <- new.partner
runlist$partner[new.partner] <- fer
}
else {
#generate offspring
if (runlist$offspring.number[fer] < runlist$max.offspring[fer]) { #number of children < max. offspring number for female 'fer'
if (length(which(runlist$residence == runlist$residence[fer])) < round(carrying.capacity)) { #deme size < carrying capacity
if (runif(1) < BD$birth.rate[(t - runlist$birth.year[fer]) + 1]) { #test for pregnancy (using birth rate given age of mother)
runlist$offspring.number[fer] <- runlist$offspring.number[fer] + 1 #add children to offspring.number of mother
runlist$offspring.number[runlist$partner[fer]] <- runlist$offspring.number[runlist$partner[fer]] + 1 #add children to offspring.number of father
#initialization of child
childID <- min(which(is.na(runlist$sex)))
runlist$sex[childID] <- ifelse(runif(1) < 0.5, "f", "m")
runlist$birth.year[childID] <- t
runlist$birth.place[childID] <- runlist$residence[fer]
runlist$residence[childID] <- runlist$residence[fer]
runlist$parent1[childID] <- fer
runlist$parent2[childID] <- runlist$partner[fer]
#set mtDNA ancestry from mother
runlist$Amt[childID] <- runlist$Amt[fer]
#set Y-DNA ancestry from father (if child is male)
if (runlist$sex[childID] == "m") {
runlist$AY[childID] <- runlist$AY[runlist$partner[fer]]
}
#random combination of autosomal markers from mother & father
autosomes.row <- nrow(autosomes) + 1 #identify the row number of children in autosomes
autosomes <- rbind(autosomes, NA) #expand autosomes by 1 row
autosomes[autosomes.row, 1] <- childID #set individual ID
child.autosome <- autosomes[which(autosomes[,1] == fer), 2:26] #set mother's autosomes for child
fathers.autosomes <- sort(sample(1:25, sample(12:13, 1))) #select 12 or 13 genes from father's autosomes
child.autosome[fathers.autosomes] <- autosomes[which(autosomes[,1] == runlist$partner[fer]), fathers.autosomes + 1] #replace mother's autosomal markers in childs autosomes
autosomes[autosomes.row, 2:26] <- child.autosome #add autosome of child to 'autosomes'
#calculate Aauto
runlist$Aauto[childID] <- mean(autosomes[autosomes.row, 2:26])
#calculate Aoverall
runlist$Aoverall[childID] <- mean(runlist[childID, 12:14])
}
}
}
}
print(which(fertiles==fer))
}
}
#calculate average distance between simulated and observed ancestries
auto <- NULL
mt <- NULL
Y <- NULL
for (j in 1:length(migration)) {
alive.in.deme.at.t.4500 <- which(runlist$birth.year <= 4500 & is.na(runlist$death.year) & runlist$residence == j)
auto[j] <- mean(runlist$Aauto[alive.in.deme.at.t.4500])
mt[j] <- mean(runlist$Amt[alive.in.deme.at.t.4500])
Y[j] <- mean(runlist$AY[alive.in.deme.at.t.4500])
}
runsummary$Aauto[i] <- mean(abs(auto - admixture$autosomal), na.rm = T)
runsummary$Amt[i] <- mean(abs(mt - admixture$mt.SNPs), na.rm = T)
runsummary$AY[i] <- mean(abs(Y - admixture$Y.SNPs), na.rm = T)
#save files
write.csv(runlist, paste("runlist", i, ".csv", sep=""), row.names=F)
write.csv(runsummary, "runsummary.csv", row.names=F)
#return number of parameter set simulated
print(i)
}
... and here are the required files.
r optimization runtime agent-based-modeling
r optimization runtime agent-based-modeling
asked Nov 10 at 17:47
Anti
285
285
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53241763%2fruntime-optimization-of-agent-based-model-written-with-r%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown