##Following DADA2 tutorial v1.8## #Install DADA2 and applicable packages## #source("https://bioconductor.org/biocLite.R")## #biocLite("dada2")## ##load DADA2 pkg## library(dada2); packageVersion("dada2") ################################################## ##################### recA ####################### ################################################## { ##Define path for unzipped fastq read files## path <- '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/recA/' list.files(path) ##string manipulation to get matched lists of the forward and reverse fastq files## fnFs <- sort(list.files(path, pattern="L001_R1_001.fastq", full.names = TRUE)) fnRs <- sort(list.files(path, pattern="L001_R2_001.fastq", full.names = TRUE)) sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) forward_plot <- plotQualityProfile(fnFs[1:4]) reverse_plot <- plotQualityProfile(fnRs[1:4]) ## Place filtered files in filtered/subdirectory## filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(250,250), matchIDs=TRUE, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out) ##Learn error rates## errF <- learnErrors(filtFs, multithread=TRUE) errR <- learnErrors(filtRs, multithread=TRUE) ##Plot eror rates to check## errorplot_f <- plotErrors(errF, nominalQ=TRUE) errorplot_r <- plotErrors(errR, nominalQ=TRUE) ##Dereplication## derepFs <- derepFastq(filtFs, verbose=TRUE) derepRs <- derepFastq(filtRs, verbose=TRUE) # Name the derep-class objects by the sample names names(derepFs) <- sample.names names(derepRs) <- sample.names ##Sample Inference## dadaFs <- dada(derepFs, err=errF, multithread=TRUE) dadaRs <- dada(derepRs, err=errR, multithread=TRUE) dadaFs[[1]] ##merge paired reads## mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) head(mergers[[1]]) ##construct an amplicon sequence variant table (ASV) table## seqtab <- makeSequenceTable(mergers) dim(seqtab) # Inspect distribution of sequence lengths## table(nchar(getSequences(seqtab))) ##remove chimeras## seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(seqtab.nochim) write.csv(seqtab.nochim, '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/asv_table_recA.csv') sum(seqtab.nochim)/sum(seqtab) } ################################################## ##################### rpoB ####################### ################################################## { ##Define path for unzipped fastq read files## path <- '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/rpoB/' list.files(path) ##string manipulation to get matched lists of the forward and reverse fastq files## fnFs <- sort(list.files(path, pattern="L001_R1_001.fastq", full.names = TRUE)) fnRs <- sort(list.files(path, pattern="L001_R2_001.fastq", full.names = TRUE)) sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) forward_plot <- plotQualityProfile(fnFs[1:4]) reverse_plot <- plotQualityProfile(fnRs[1:4]) ## Place filtered files in filtered/subdirectory## filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(250,250), matchIDs=TRUE, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out) ##Learn error rates## errF <- learnErrors(filtFs, multithread=TRUE) errR <- learnErrors(filtRs, multithread=TRUE) ##Plot eror rates to check## errorplot_f <- plotErrors(errF, nominalQ=TRUE) errorplot_r <- plotErrors(errR, nominalQ=TRUE) ##Dereplication## derepFs <- derepFastq(filtFs, verbose=TRUE) derepRs <- derepFastq(filtRs, verbose=TRUE) # Name the derep-class objects by the sample names names(derepFs) <- sample.names names(derepRs) <- sample.names ##Sample Inference## dadaFs <- dada(derepFs, err=errF, multithread=TRUE) dadaRs <- dada(derepRs, err=errR, multithread=TRUE) dadaFs[[1]] ##merge paired reads## mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) head(mergers[[1]]) ##construct an amplicon sequence variant table (ASV) table## seqtab <- makeSequenceTable(mergers) dim(seqtab) # Inspect distribution of sequence lengths## table(nchar(getSequences(seqtab))) ##remove chimeras## seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(seqtab.nochim) write.csv(seqtab.nochim, '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/asv_table_rpoB.csv') sum(seqtab.nochim)/sum(seqtab) } ################################################## ##################### nodA ####################### ################################################## { ##Define path for unzipped fastq read files## path <- '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/nodA/' list.files(path) ##string manipulation to get matched lists of the forward and reverse fastq files## fnFs <- sort(list.files(path, pattern="L001_R1_001.fastq", full.names = TRUE)) fnRs <- sort(list.files(path, pattern="L001_R2_001.fastq", full.names = TRUE)) sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) forward_plot <- plotQualityProfile(fnFs[1:4]) reverse_plot <- plotQualityProfile(fnRs[1:4]) ## Place filtered files in filtered/subdirectory## filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(250,250), matchIDs=TRUE, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out) ##Learn error rates## errF <- learnErrors(filtFs, multithread=TRUE) errR <- learnErrors(filtRs, multithread=TRUE) ##Plot eror rates to check## errorplot_f <- plotErrors(errF, nominalQ=TRUE) errorplot_r <- plotErrors(errR, nominalQ=TRUE) ##Dereplication## derepFs <- derepFastq(filtFs, verbose=TRUE) derepRs <- derepFastq(filtRs, verbose=TRUE) # Name the derep-class objects by the sample names names(derepFs) <- sample.names names(derepRs) <- sample.names ##Sample Inference## dadaFs <- dada(derepFs, err=errF, multithread=TRUE) dadaRs <- dada(derepRs, err=errR, multithread=TRUE) dadaFs[[1]] ##merge paired reads## mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) head(mergers[[1]]) ##construct an amplicon sequence variant table (ASV) table## seqtab <- makeSequenceTable(mergers) dim(seqtab) # Inspect distribution of sequence lengths## table(nchar(getSequences(seqtab))) ##remove chimeras## seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(seqtab.nochim) write.csv(seqtab.nochim, '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/asv_table_nodA.csv') sum(seqtab.nochim)/sum(seqtab) } ################################################## ##################### nodD ####################### ################################################## { ##Define path for unzipped fastq read files## path <- '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/nodD/' list.files(path) ##string manipulation to get matched lists of the forward and reverse fastq files## fnFs <- sort(list.files(path, pattern="L001_R1_001.fastq", full.names = TRUE)) fnRs <- sort(list.files(path, pattern="L001_R2_001.fastq", full.names = TRUE)) sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) forward_plot <- plotQualityProfile(fnFs[1:4]) reverse_plot <- plotQualityProfile(fnRs[1:4]) ## Place filtered files in filtered/subdirectory## filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(250,250), matchIDs=TRUE, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) head(out) ##Learn error rates## errF <- learnErrors(filtFs, multithread=TRUE) errR <- learnErrors(filtRs, multithread=TRUE) ##Plot eror rates to check## errorplot_f <- plotErrors(errF, nominalQ=TRUE) errorplot_r <- plotErrors(errR, nominalQ=TRUE) ##Dereplication## derepFs <- derepFastq(filtFs, verbose=TRUE) derepRs <- derepFastq(filtRs, verbose=TRUE) # Name the derep-class objects by the sample names names(derepFs) <- sample.names names(derepRs) <- sample.names ##Sample Inference## dadaFs <- dada(derepFs, err=errF, multithread=TRUE) dadaRs <- dada(derepRs, err=errR, multithread=TRUE) dadaFs[[1]] ##merge paired reads## mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) head(mergers[[1]]) ##construct an amplicon sequence variant table (ASV) table## seqtab <- makeSequenceTable(mergers) dim(seqtab) # Inspect distribution of sequence lengths## table(nchar(getSequences(seqtab))) ##remove chimeras## seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(seqtab.nochim) write.csv(seqtab.nochim, '/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/asv_table_nodD.csv') sum(seqtab.nochim)/sum(seqtab) }