# Protocol for UNOISE # Below protocol based off the following links: # https://www.drive5.com/usearch/manual/pipe_readprep.html # https://www.drive5.com/usearch/manual/ex_min2.html # First define sample names # Merge (assemble) paired reads # https://www.drive5.com/usearch/manual/cmd_fastq_mergepairs.html for f in 1_raw_reads/*R1_001.fastq; do usearch -fastq_mergepairs $f -fastqout "${f/R1_001.fastq/merged.fq}" -relabel @ done # Split merged reads into each gene using MAUI-seq gene separating script. Requires a list.txt file listing all the merged read files to split. python3 sort_qqad_genes_1b.py # move the genes into their own separate files. Then continue with the following commands for each gene within each gene file. # Remove primers from the reads using the remove_primers.py script. Moves primerless reads into a directory called 'forward'. for f in *.fastq; do python3 remove_primers.py $f done # Remove sequences that have a length less than expected amplicon size. A few sequences after primer trimming are smaller than the expected amplicon size. # Therefore it is best to remove them. # The cut off length should be different for each gene. e.g: rpoB = 253, recA = 251, nodD = 253, nodA = 259. # From this point onwards nodD is used as an example. for f in *_nodd.fastq ; do cat $f | paste - - - - | awk 'length($2) == 253' | sed 's/\t/\n/g' > "${f/.fastq/_trim.fastq}" done # Quality filter reads # https://www.drive5.com/usearch/manual/pipe_readprep_filter.html for f in nodD.*_trim.fastq; do usearch -fastq_filter $f -fastq_maxee 1.0 -fastaout "${f/_trim.fastq/_filtered.fa}" done # Pooling samples # Combine all reads for all samples, for generating OTUs and making an OTU table. # Pool samples, i.e. concatenate reads for all samples that were sequenced in the same run. # https://www.drive5.com/usearch/manual/pool_samples.html cat *filtered.fa > filtered_concat_allsamples.fa # Dereplication # The input sequences to cluster_otus or unoise3 must be a set of unique sequences sorted in order of decreasing abundance with size annotations in the labels. # I suggest you use -relabel Uniq so that the unique sequences are labeled Uniq1, Uniq2 and so on. The input to fastx_uniques should be the reads after any quality filtering or length trimming. # The -fastaout option specifies a FASTA output file for the unique sequences. Sequences are sorted by decreasing abundance. # The -minuniquesize option sets a minimum abundance. Unique sequences with a lower abundance are discarded. Default is 1, which means that all unique sequences are output. # Reverse-complemented matching for nucleotide sequences is supported by specifying -strand both. # Find unique read sequences and abundances # https://www.drive5.com/usearch/manual/upp_derep.html # https://www.drive5.com/usearch/manual/cmd_fastx_uniques.html usearch -fastx_uniques filtered_concat_allsamples.fa -sizeout -relabel Uniq -fastaout nodD_zotus_uniques.fa -tabbedout nodD_zotus_uniqueclustering_summary.txt # Denoise by unoise3 to produce (ZOTUs) # The -tabbedout option specifies a tabbed text filename which reports the processing done for each sequence, e.g. if it is classified as noisy or chimeric. # https://www.drive5.com/usearch/manual/pipe_otus.html # https://www.drive5.com/usearch/manual/cmd_unoise3.html usearch -unoise3 nodD_zotus_uniques.fa -zotus nodD_zotus.fa -tabbedout nodD_unoise3.txt # concatenate all read files that have had primers removed (but before any quality filtering etc.). This will be used as input for making the ZOTU table. cat *_nodd_trim.fastq > noprimers_concat_allsamples.fa # ZOTU output table # otutab command: Input should be reads before quality filtering and before discarding low-abundance unique sequences, e.g. singletons # https://www.drive5.com/usearch/manual/pipe_otutab.html usearch -otutab noprimers_concat_allsamples.fa -zotus nodD_zotus.fa -otutabout nodD_zotutable_raw.txt -mapout nodD_zotus_zmap.txt