#===================================================================================# # # # PCA # # # #===================================================================================# #==========================================# # # # DADA2 # # # #==========================================# { setwd('/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/') library(ggplot2) library(reshape2) library(plyr) library(data.table) library(dplyr) library(viridis) library(BBmisc) require(caret) library(ggfortify) library(ggpubr) #data = read.csv('nodA_seqid_pcamatrix.csv', sep = ',') #head(data) data = read.csv('allgenes_dada2.csv', sep = ';') head(data) names(data)[names(data)=="X..."] <- "sampleID" head(data) # Normalize data by row and sample data_norm <- data[,-1] noda <- data_norm[,1:18] nodd <- data_norm[,19:40] reca <- data_norm[,41:53] rpob <- data_norm[,54:68] for (i in 1:nrow(data_norm)) { normalized1 = (noda[i,]/(sum(noda[i,]))) noda[i,] <- normalized1 normalized2 = (nodd[i,]/(sum(nodd[i,]))) nodd[i,] <- normalized2 normalized3 = (reca[i,]/(sum(reca[i,]))) reca[i,] <- normalized3 normalized4 = (rpob[i,]/(sum(rpob[i,]))) rpob[i,] <- normalized4 } data_norm <- cbind(rpob, reca, noda, nodd) ###### REMOVE ROWS # Remove Aarhus samples #data <- data[-c(25:32),] # Create plot column for colouring p2.data <- data.frame(do.call(rbind, strsplit(as.character(data$sampleID), ""))) p3.data <- paste(p2.data$X1) plot.names <- p3.data data.pca <- data_norm # Replace 0s with 0.00001 data.pca[data.pca == 0] <- 0.0001 # Log transform data for pca # Change end row value to be the ncolumns x = ncol(data.pca) data.pca[ ,1:x] <- log10(data.pca[ ,1:x]) # Remove 0 variance using caret for when Aarhus plots are removed previously NZV <- nearZeroVar(data.pca, saveMetrics = TRUE) nzv.col <- nearZeroVar(data.pca) if(length(nzv.col) > 0) data.pca <- data.pca[, -nzv.col] d.pca <- prcomp(data.pca, center = TRUE, scale. = TRUE) # Create percent variance label for axis percentVar <- d.pca$sdev^2 / sum( d.pca$sdev^2 ) percentVar <- round(100 * percentVar) plotcol <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") (pcaplot <- ggplot(d.pca, aes(PC1, PC2, color = plot.names , fill = plot.names)) + coord_fixed() + geom_point(size=3) + #must manually apply the percentage variance in this case xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + #geom_polygon(data = hulls, alpha = 0.5) + stat_chull(geom = "polygon", alpha = 0.2) + #stat_ellipse(geom = "polygon", alpha = 0.2, type = "t", level = 0.95) + #if you want to look at ellipses of t-distibuted data 95%. #stat_conf_ellipse(geom = "polygon", alpha = 0.2, level = 0.95, bary = TRUE) + #if you want to look at ellipses of mean 95%. bary = false is the same as stat_ellipse(type = "euclid", level - 0.95). stat_mean(size = 5) + scale_color_manual(values = plotcol) + scale_fill_manual(values = plotcol) + theme_bw() + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20))) ggsave('norm_dada2_all_PC12.pdf', plot = pcaplot, width = 25, height = 20, unit = 'cm') } #==========================================# # # # Individual genes # # # #==========================================# { setwd('/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/') library(ggplot2) library(reshape2) library(plyr) library(data.table) library(dplyr) library(viridis) library(BBmisc) require(caret) #data = read.csv('nodA_seqid_pcamatrix.csv', sep = ',') #head(data) data = read.csv('allgenes_dada2.csv', sep = ';') head(data) names(data)[names(data)=="X..."] <- "sampleID" head(data) # Create plot column for colouring p2.data <- data.frame(do.call(rbind, strsplit(as.character(data$sampleID), ""))) p3.data <- paste(p2.data$X1) plot.names <- p3.data # Remove name column data.pca <- data[,-1] # Subset genes nodA <- data.pca[,1:18] nodD <- data.pca[,19:40] recA <- data.pca[,41:53] rpoB <- data.pca[,54:68] for (i in 1:nrow(data.pca)) { normalized1 = (nodA[i,]/(sum(nodA[i,]))) nodA[i,] <- normalized1 normalized2 = (nodD[i,]/(sum(nodD[i,]))) nodD[i,] <- normalized2 normalized3 = (recA[i,]/(sum(recA[i,]))) recA[i,] <- normalized3 normalized4 = (rpoB[i,]/(sum(rpoB[i,]))) rpoB[i,] <- normalized4 } # Replace 0s with 0.00001 nodA[nodA == 0] <- 0.0001 nodD[nodD == 0] <- 0.0001 recA[recA == 0] <- 0.0001 rpoB[rpoB == 0] <- 0.0001 # Log transform data for pca # Change end row value to be the ncolumns x = ncol(nodA) nodA[ ,1:x] <- log10(nodA[ ,1:x]) y = ncol(nodD) nodD[ ,1:y] <- log10(nodD[ ,1:y]) z = ncol(recA) recA[ ,1:z] <- log10(recA[ ,1:z]) q = ncol(rpoB) rpoB[ ,1:q] <- log10(rpoB[ ,1:q]) # Remove 0 variance using caret for when Aarhus plots are removed previously nzv.col <- nearZeroVar(nodA) if(length(nzv.col) > 0) nodA <- nodA[, -nzv.col] nzv.col <- nearZeroVar(nodD) if(length(nzv.col) > 0) nodD <- nodD[, -nzv.col] nzv.col <- nearZeroVar(recA) if(length(nzv.col) > 0) recA <- recA[, -nzv.col] nzv.col <- nearZeroVar(rpoB) if(length(nzv.col) > 0) rpoB <- rpoB[, -nzv.col] # PCA analysis nodA.pca <- prcomp(nodA, center = TRUE, scale. = TRUE) nodD.pca <- prcomp(nodD, center = TRUE, scale. = TRUE) recA.pca <- prcomp(recA, center = TRUE, scale. = TRUE) rpoB.pca <- prcomp(rpoB, center = TRUE, scale. = TRUE) # Create percent variance label for axis nodApercentVar <- nodA.pca$sdev^2 / sum( nodA.pca$sdev^2 ) nodApercentVar <- round(100 * nodApercentVar) nodDpercentVar <- nodD.pca$sdev^2 / sum( nodD.pca$sdev^2 ) nodDpercentVar <- round(100 * nodDpercentVar) recApercentVar <- recA.pca$sdev^2 / sum( recA.pca$sdev^2 ) recApercentVar <- round(100 * recApercentVar) rpoBpercentVar <- rpoB.pca$sdev^2 / sum( rpoB.pca$sdev^2 ) rpoBpercentVar <- round(100 * rpoBpercentVar) plotcol <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") (nodApcaplot <- ggplot(nodA.pca, aes(PC1, PC2, color = plot.names , fill = plot.names)) + coord_fixed() + geom_point(size=3) + #must manually apply the percentage variance in this case xlab(paste0("PC1: ",nodApercentVar[1],"% variance")) + ylab(paste0("PC2: ",nodApercentVar[2],"% variance")) + #geom_polygon(data = hulls, alpha = 0.5) + stat_chull(geom = "polygon", alpha = 0.2) + #stat_ellipse(geom = "polygon", alpha = 0.2, type = "t", level = 0.95) + #if you want to look at ellipses of t-distibuted data 95%. #stat_conf_ellipse(geom = "polygon", alpha = 0.2, level = 0.95, bary = TRUE) + #if you want to look at ellipses of mean 95%. bary = false is the same as stat_ellipse(type = "euclid", level - 0.95). stat_mean(size = 5) + scale_color_manual(values = plotcol) + scale_fill_manual(values = plotcol) + theme_bw() + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20))) (nodDpcaplot <- ggplot(nodD.pca, aes(PC1, PC2, color = plot.names , fill = plot.names)) + coord_fixed() + geom_point(size=3) + #must manually apply the percentage variance in this case xlab(paste0("PC1: ",nodDpercentVar[1],"% variance")) + ylab(paste0("PC2: ",nodDpercentVar[2],"% variance")) + #geom_polygon(data = hulls, alpha = 0.5) + stat_chull(geom = "polygon", alpha = 0.2) + #stat_ellipse(geom = "polygon", alpha = 0.2, type = "t", level = 0.95) + #if you want to look at ellipses of t-distibuted data 95%. #stat_conf_ellipse(geom = "polygon", alpha = 0.2, level = 0.95, bary = TRUE) + #if you want to look at ellipses of mean 95%. bary = false is the same as stat_ellipse(type = "euclid", level - 0.95). stat_mean(size = 5) + scale_color_manual(values = plotcol) + scale_fill_manual(values = plotcol) + theme_bw() + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20))) (recApcaplot <- ggplot(recA.pca, aes(PC1, PC2, color = plot.names , fill = plot.names)) + coord_fixed() + geom_point(size=3) + #must manually apply the percentage variance in this case xlab(paste0("PC1: ",recApercentVar[1],"% variance")) + ylab(paste0("PC2: ",recApercentVar[2],"% variance")) + #geom_polygon(data = hulls, alpha = 0.5) + stat_chull(geom = "polygon", alpha = 0.2) + #stat_ellipse(geom = "polygon", alpha = 0.2, type = "t", level = 0.95) + #if you want to look at ellipses of t-distibuted data 95%. #stat_conf_ellipse(geom = "polygon", alpha = 0.2, level = 0.95, bary = TRUE) + #if you want to look at ellipses of mean 95%. bary = false is the same as stat_ellipse(type = "euclid", level - 0.95). stat_mean(size = 5) + scale_color_manual(values = plotcol) + scale_fill_manual(values = plotcol) + theme_bw() + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20))) (rpoBpcaplot <- ggplot(rpoB.pca, aes(PC1, PC2, color = plot.names , fill = plot.names)) + coord_fixed() + geom_point(size=3) + #must manually apply the percentage variance in this case xlab(paste0("PC1: ",rpoBpercentVar[1],"% variance")) + ylab(paste0("PC2: ",rpoBpercentVar[2],"% variance")) + #geom_polygon(data = hulls, alpha = 0.5) + stat_chull(geom = "polygon", alpha = 0.2) + #stat_ellipse(geom = "polygon", alpha = 0.2, type = "t", level = 0.95) + #if you want to look at ellipses of t-distibuted data 95%. #stat_conf_ellipse(geom = "polygon", alpha = 0.2, level = 0.95, bary = TRUE) + #if you want to look at ellipses of mean 95%. bary = false is the same as stat_ellipse(type = "euclid", level - 0.95). stat_mean(size = 5) + scale_color_manual(values = plotcol) + scale_fill_manual(values = plotcol) + theme_bw() + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20))) ggsave('dada2_nodA_PC12.pdf', plot = nodApcaplot, width = 25, height = 20, unit = 'cm') ggsave('dada2_nodD_PC12.pdf', plot = nodDpcaplot, width = 25, height = 20, unit = 'cm') ggsave('dada2_recA_PC12.pdf', plot = recApcaplot, width = 25, height = 20, unit = 'cm') ggsave('dada2_rpoB_PC12.pdf', plot = rpoBpcaplot, width = 25, height = 20, unit = 'cm') } #===================================================================================# # # # RATIOS # # # #===================================================================================# library(ggplot2) library("lattice") library(dplyr) library(reshape2) #==========================================# # # # rpoB # # # #==========================================# setwd("/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/") d <- read.csv("rpoB.csv", sep = ';', dec = ",") names(d)[names(d)=="X..."] <- "Sequence" head(d) ### EXPECTED V OBSERVED RATIO ### {# Correlation platinum rpoB = 0.9870113 cor(d$expected[1:23], d$ratio[1:23]) # Correlation phusion rpoB = 0.9988713 cor(d$expected[24:37], d$ratio[24:37]) # Plot plot <- ggplot(d, aes(x = expected, y = ratio, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Observed proportion") + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('rpoB_ratio_exp_v_obs.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } ### CHIMERA FREQUENCY ### { plot <- ggplot(d, aes(x = expected, y = chisprop, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Chimeras/total counts") + ylim(0, 0.25) + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('rpoB_ratio_chimeras.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } #==========================================# # # # recA # # # #==========================================# setwd("/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/") d <- read.csv("recA.csv", sep = ';', dec = ",") names(d)[names(d)=="X..."] <- "Sequence" head(d) ### EXPECTED V OBSERVED RATIO ### {# Correlation platinum recA = 0.9907104 cor(d$expected[1:23], d$ratio[1:23]) # Correlation phusion recA = 0.9522759 cor(d$expected[24:37], d$ratio[24:37]) # Plot plot <- ggplot(d, aes(x = expected, y = ratio, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Observed proportion") + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('recA_ratio_exp_v_obs.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } ### CHIMERA FREQUENCY ### { plot <- ggplot(d, aes(x = expected, y = chisprop, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Chimeras/total counts") + ylim(0, 0.25) + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('recA_ratio_chimeras.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } #==========================================# # # # nodA # # # #==========================================# setwd("/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/") d <- read.csv("nodA.csv", sep = ';', dec = ",") names(d)[names(d)=="X..."] <- "Sequence" head(d) ### EXPECTED V OBSERVED RATIO ### {# Correlation platinum nodA = 0.9970802 cor(d$expected[1:23], d$ratio[1:23]) # Correlation phusion nodA = 0.9952959 cor(d$expected[24:37], d$ratio[24:37]) # Plot plot <- ggplot(d, aes(x = expected, y = ratio, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Observed proportion") + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('nodA_ratio_exp_v_obs.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } ### CHIMERA FREQUENCY ### { plot <- ggplot(d, aes(x = expected, y = chisprop, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Chimeras/total counts") + ylim(0, 0.25) + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('nodA_ratio_chimeras.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } #==========================================# # # # nodD # # # #==========================================# setwd("/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/synth_mix/") d <- read.csv("nodD.csv", sep = ';', dec = ",") names(d)[names(d)=="X..."] <- "Sequence" head(d) ### EXPECTED V OBSERVED RATIO ### {# Correlation platinum nodD = 0.998199 cor(d$expected[1:23], d$ratio[1:23]) # Correlation phusion noD = 0.9983725 cor(d$expected[24:37], d$ratio[24:37]) # Plot plot <- ggplot(d, aes(x = expected, y = ratio, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Observed proportion") + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('nodD_ratio_exp_v_obs.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } ### CHIMERA FREQUENCY ### { plot <- ggplot(d, aes(x = expected, y = chisprop, fill = factor(polymerase))) + coord_fixed() + theme_bw() + geom_point(size = 4, aes(color = factor(polymerase))) + scale_colour_brewer(palette="Set1") + labs(x = "Expected proportion", y = "Chimeras/total counts") + ylim(0, 0.25) + theme(aspect.ratio=1, legend.title = element_blank(), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20), axis.title=element_text(size = 22), legend.text = element_text(size = 20)) #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('nodD_ratio_chimeras.pdf', plot = plot, width = 25, height = 20, unit = 'cm') } #===================================================================================# # # # HEAT MAPS # # # #===================================================================================# # http://www.roymfrancis.com/a-guide-to-elegant-tiled-heatmaps-in-r/ #==========================================# # # # dada2 # # # #==========================================# { setwd('/Users/sara91/Documents/PhD/Materials_and_methods/QQAD/methods_paper/dada2/') library(ggplot2) library(reshape2) library(plyr) library(data.table) library(dplyr) library(viridis) #data = read.csv('nodA_seqid_pcamatrix.csv', sep = ',') #head(data) data = read.csv('allgenes_dada2_0.7thresh.csv', sep = ';') head(data) names(data)[names(data)=="X..."] <- "sampleID" head(data) # Transform sample ID to numeric type, and sort them numerically #data$sample <- as.numeric(data$sample) #data <- data[order(data$sample),] data_norm <- data[,-1] #normalized = (data_norm[1,]-min(data_norm[1,]))/(max(data_norm[1,])-min(data_norm[1,])) #normalized2 = (data_norm[1,]/(sum(data_norm[1,]))) # Normalize data by row and sample noda <- data_norm[,1:18] nodd <- data_norm[,19:40] reca <- data_norm[,41:53] rpob <- data_norm[,54:68] for (i in 1:nrow(data_norm)) { normalized1 = (noda[i,]/(sum(noda[i,]))) noda[i,] <- normalized1 normalized2 = (nodd[i,]/(sum(nodd[i,]))) nodd[i,] <- normalized2 normalized3 = (reca[i,]/(sum(reca[i,]))) reca[i,] <- normalized3 normalized4 = (rpob[i,]/(sum(rpob[i,]))) rpob[i,] <- normalized4 } #data_norm[,1:10] <- noda #data_norm[,11:28] <- nodd #data_norm[,29:37] <- reca #data_norm[,38:50] <- rpob data_norm <- cbind(rpob, reca, noda, nodd) # Normalize data by row #for (i in 1:nrow(data_norm)) { # normalized2 = (data_norm[i,]/(sum(data_norm[i,]))) # data_norm[i,] <- normalized2 #} # Add sampleID back in data frame data_norm$sampleID <- data$sampleID # Reshape from long to wide data_norm <- as.data.frame(data_norm) p1.data <- melt(data_norm, id='sampleID') # Create gene column for facetting p2.data <- data.frame(do.call(rbind, strsplit(as.character(p1.data$variable), ""))) p3.data <- paste(p2.data$X1, p2.data$X2, p2.data$X3, p2.data$X4, sep='') p1.data$gene <- p3.data # Transform data with log or squareroot p1.data$value <- log10(as.numeric(p1.data$value)) #p1.data$value <- sqrt(p1.data$value) # Replace NAs with 0 #p1.data[is.na(p1.data )] <- 0.0001 #p1.data$value[is.infinite(p1.data$value)] <- 0.0001 y = subset(p1.data, value > -100 ) x = min(y$value) #p1.data[is.na(p1.data )] <- x p1.data$value[is.infinite(p1.data$value)] <- x # Use pretty row names and discard first column row.names(p1.data) <- p1.data$SampleID p1.data <- p1.data[ , !(names(p1.data) %in% c('SampleID'))] # Force order of facets to match list # https://stackoverflow.com/questions/14262497/fixing-the-order-of-facets-in-ggplot p1.data$gene_f = factor(p1.data$gene, levels=c('rpoB','recA','nodA','nodD')) # Plot p = ggplot(p1.data, aes(x=variable, y=sampleID, fill=value)) + geom_tile() + #geom_tile(colour="white",size=0.05) + scale_y_discrete(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) + theme_bw() + facet_grid(~gene_f, switch = "x", scales = "free_x", space = "free_x") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title=element_text(size = 22), axis.text.y = element_text(size = 20), legend.text = element_text(size = 20), legend.title = element_blank(), strip.text.x = element_text(size = 20), strip.text = element_text(face = "italic")) + labs(x="Amplicon", y = "Sample ID") + scale_fill_viridis() p # reverse direction of colour palette: scale_fill_distiller(palette="BrBG", trans = "reverse") #function to save specific ggplot graph. additional parameters(width=,height=,units='cm') ggsave('dada2_heat_log.pdf', plot = p, width = 50, height = 20, unit = 'cm') }