library(ggplot2)
library(plotly)
library(ggpubr)
library(Rcpp)
library(vegan)
library(data.table)
devtools::install_github("teunbrand/ggh4x")
library(ggh4x)
devtools::install_github("joey711/phyloseq")
library(phyloseq)
devtools::install_github("schuyler-smith/schuylR")
library(schuylR)
devtools::install_github("schuyler-smith/ssBLAST")
library(ssBLAST)
devtools::install_github("schuyler-smith/phylosmith")
library(phylosmith)
rarefy <- function(data, n){
col_names <- colnames(data)
rarefied_table <- data.table(categories=data[[1]])
for(sample in seq(ncol(data)-1)+1){
rarvec <- vector()
for(i in seq(length(data[[sample]]))){
rarvec <- c(rarvec, rep(i, data[[sample]][i]))
}
subsample <- sample(rarvec, n, replace=FALSE)
subsample <- table(subsample)
subsample <- merge(data.frame(index = seq(length(data[[sample]]))),
data.table(subsample), by = 1, all = TRUE)[,2]
subsample[is.na(subsample)] <- 0
rarefied_table <- cbind(rarefied_table, subsample)
}
colnames(rarefied_table) <- c(col_names)
return(rarefied_table)
}
rarefaction <- function(data, max_n=NA, intervals=10){
if(is.na(max_n)){max_n <- min(colSums(data[,-1]))}
n_categories <- vector()
n <- vector()
for(i in seq(intervals, max_n, intervals)){
n <- c(n, i)
n_categories <- c(n_categories, sum(rowSums(rarefy(data, i)[,-1]) > 0))
}
return(data.frame(n=n, counts=n_categories))
}
rarefaction_curve <- function(data, max_n=NA, intervals=10, iter = 100){
rarefied_counts <- data.frame()
for(i in seq(iter)){
rarefied_counts <- rbind(rarefied_counts,
rarefaction(data, max_n, intervals))}
g <- ggplot(rarefied_counts, aes_string(x="n", y="counts")) +
geom_smooth(formula=y ~ x, method="loess", se=TRUE, fullrange=TRUE, level=0.95, color='black') +
theme_bw()
return(g)
}
Schuyler Smith
Ph.D. Student - Bioinformatics and Computational Biology
Iowa State University. Ames, IA.