--- title: "Visualizing Phenotype–Genotype Discrepancies in AMR" output: html_document --- ```{r setup, include=FALSE} # Load all necessary libraries at once library(tidyverse) # Data manipulation and tidying library(janitor) # Clean column names library(pheatmap) # Heatmap plotting (simpler) library(ComplexHeatmap) # Advanced heatmaps library(circlize) # Color functions for ComplexHeatmap library(ggtree) # Phylogenetic tree visualization library(ape) # Tree manipulation library(ggnewscale) # Multiple fill/color scales in ggplot2 library(RColorBrewer) # Color palettes ``` 1. Load and Clean Data ```{r} # Load ResFinder genotypic AMR data, clean and tidy it resfinder_raw <- read_csv("fake_resfinder_results.csv", show_col_types = FALSE) %>% clean_names() %>% # Standardize column names (lowercase + underscores) select(isolate_id, resistance_gene, phenotype) %>% separate_rows(phenotype, sep = ",\\s*") %>% # Split multiple phenotypes into separate rows distinct(isolate_id, phenotype, resistance_gene) %>% mutate(genotypic_resistance = 1) # Indicate gene presence as 1 # Load phenotypic AMR data, tidy to long format, encode resistance as binary pheno_raw <- read_csv("fake_phenotypic_data.csv", show_col_types = FALSE) %>% clean_names() %>% pivot_longer(cols = -isolate_id, names_to = "phenotype", values_to = "phenotypic_resistance") %>% mutate(phenotypic_resistance = ifelse(phenotypic_resistance == "R", 1, 0)) # Combine genotypic and phenotypic data by isolate and phenotype amr_combined <- full_join(pheno_raw, resfinder_raw, by = c("isolate_id", "phenotype")) %>% replace_na(list(genotypic_resistance = 0, resistance_gene = "Phenotypic only")) ``` Mini Exercise 1 Print the first 10 rows of amr_combined. Can you identify rows where phenotype is resistant but no gene was found? ```{r} #Solution ``` 2. Compare Phenotypic and Genotypic Patterns ```{r} # Add a column categorizing combined resistance pattern comparison_summary <- amr_combined %>% mutate(resistance_pattern = case_when( phenotypic_resistance == 1 & genotypic_resistance == 1 ~ "Both Resistant", phenotypic_resistance == 1 & genotypic_resistance == 0 ~ "Phenotypic Only", phenotypic_resistance == 0 & genotypic_resistance == 1 ~ "Genotypic Only", TRUE ~ "Both Susceptible" )) # Filter to show only discrepancies discrepancies <- comparison_summary %>% filter(resistance_pattern != "Both Resistant" & resistance_pattern != "Both Susceptible") # View discrepancies discrepancies ``` Mini Exercise 2 How many isolates have only phenotypic resistance but no genotypic evidence? ```{r} ``` 3. Load Metadata for Isolates ```{r} metadata <- read_csv("updated_example_metadata_with_st.csv", show_col_types = FALSE) %>% clean_names() # Preview metadata head(metadata) ``` 4. Visualize Discrepancies: Simple Heatmap with pheatmap ```{r} # Prepare metadata for annotation (rows must be named by isolate_id) annotation_row <- metadata %>% filter(isolate_id %in% amr_combined$isolate_id) %>% mutate(across(everything(), ~replace_na(., "Unknown"))) %>% column_to_rownames("isolate_id") %>% select(country, source) # Define color palettes for annotation unique_countries <- unique(annotation_row$country) unique_sources <- unique(annotation_row$source) country_colors <- setNames(RColorBrewer::brewer.pal(max(3, length(unique_countries)), "Set3")[1:length(unique_countries)], unique_countries) source_colors <- setNames(RColorBrewer::brewer.pal(max(3, length(unique_sources)), "Set2")[1:length(unique_sources)], unique_sources) annotation_colors <- list(country = country_colors, source = source_colors) # Create combined resistance category matrix for heatmap heatmap_combined <- amr_combined %>% mutate(resistance_combined = case_when( phenotypic_resistance == 1 & genotypic_resistance == 1 ~ 3, phenotypic_resistance == 1 & genotypic_resistance == 0 ~ 1, phenotypic_resistance == 0 & genotypic_resistance == 1 ~ 2, TRUE ~ 0 )) %>% select(isolate_id, phenotype, resistance_combined) %>% pivot_wider(id_cols = isolate_id, names_from = phenotype, values_from = resistance_combined, values_fill = 0) %>% column_to_rownames("isolate_id") %>% as.matrix() # Define colors for resistance states resistance_colors <- c("green", "yellow", "orange", "red") # Plot heatmap pheatmap( heatmap_combined, color = resistance_colors, breaks = c(-0.1, 0.9, 1.9, 2.9, 3.9), legend_breaks = c(0, 1, 2, 3), legend_labels = c("None", "Phenotypic Only", "Genotypic Only", "Both"), cluster_rows = FALSE, cluster_cols = FALSE, annotation_row = annotation_row[rownames(heatmap_combined), ], annotation_colors = annotation_colors, main = "AMR Profile: Phenotypic and Genotypic Resistance" ) ``` Mini Exercise 3 Try changing the cluster_rows and cluster_cols arguments to TRUE. What happens? What do the clusters show? ```{r} #Changing cluster_rows and cluster_cols to TRUE groups similar isolates and phenotypes visually ``` 5. Visualize with ComplexHeatmap (more customizable) ```{r} ha_row <- rowAnnotation( df = annotation_row, col = list(country = country_colors, source = source_colors), show_annotation_name = TRUE ) resistance_colors_named <- c( "0" = "green", "1" = "#E69F00", "2" = "#56B4E9", "3" = "red" ) Heatmap( heatmap_combined, name = "Resistance", col = resistance_colors_named, row_names_side = "left", column_names_rot = 45, row_names_gp = gpar(fontsize = 10), column_names_gp = gpar(fontsize = 10), cluster_rows = FALSE, cluster_columns = FALSE, left_annotation = ha_row, heatmap_legend_param = list( title = "Resistance Pattern", at = c(0, 1, 2, 3), labels = c("None", "Phenotypic Only", "Genotypic Only", "Both") ) ) ``` 6. Phylogenetic Tree Visualization with ggtree ```{r} # Load the tree tree <- read.tree("updated_example_tree.nwk") # Load and clean metadata metadata <- read_csv("updated_example_metadata_with_st.csv", show_col_types = FALSE) %>% clean_names() # Check tip labels match (they do based on your message, so this is just precaution) metadata <- metadata %>% filter(isolate_id %in% tree$tip.label) # Plot tree with ST type p <- ggtree(tree) %<+% metadata + geom_tiplab(size = 3, offset = 0.2) + geom_tippoint(aes(color = st_type), size = 4, na.rm = TRUE) + scale_color_brewer(palette = "Set1", name = "ST Type") + theme_tree2() p ``` 7. Combined Tree and Heatmap with Separate Legends ```{r} # Load tree tree <- read.tree("updated_example_tree.nwk") # Load and clean metadata as data.frame with rownames metadata <- read_csv("updated_example_metadata_with_st.csv", show_col_types = FALSE) %>% clean_names() %>% mutate(across(everything(), ~replace_na(., "Unknown"))) %>% as.data.frame() rownames(metadata) <- metadata$isolate_id # Prepare annotation data (metadata columns only, base data.frame) annotation_data <- metadata[, c("country", "source", "st_type"), drop = FALSE] # Prepare resistance data matrix (replace amr_combined with your real data) heatmap_combined <- amr_combined %>% mutate(resistance_combined = case_when( phenotypic_resistance == 1 & genotypic_resistance == 1 ~ 3, phenotypic_resistance == 1 & genotypic_resistance == 0 ~ 1, phenotypic_resistance == 0 & genotypic_resistance == 1 ~ 2, TRUE ~ 0 )) %>% select(isolate_id, phenotype, resistance_combined) %>% pivot_wider(id_cols = isolate_id, #Converts data to wide format: isolates as rows, antibiotics as columns, Rows named by isolate IDs. names_from = phenotype, values_from = resistance_combined, values_fill = 0) %>% column_to_rownames("isolate_id") %>% as.data.frame() # Convert numeric to factor with explicit levels, Converts numeric resistance values to factors with explicit levels for consistent coloring. heatmap_factor <- heatmap_combined heatmap_factor[] <- lapply(heatmap_factor, function(x) factor(as.character(x), levels = c("0","1","2","3"))) # Reorder rows to match tree tip labels #Reorders both the resistance matrix and metadata so rows match exactly the order of the isolates in the tree. #Ensures annotations align correctly with tree tips. heatmap_factor <- heatmap_factor[tree$tip.label, , drop = FALSE] annotation_data <- annotation_data[tree$tip.label, , drop = FALSE] # Define colors "Defines named color mappings for: Resistance categories, Countries, Sample sources, Sequence Types (ST)." resistance_colors <- c("0"="green", "1"="yellow", "2"="orange", "3"="red") # Plot #Basic tree plot with tip labels. p <- ggtree(tree, branch.length = "none") + geom_tiplab(size=3) p <- gheatmap(p, annotation_data, #Adds a heatmap next to the tree showing metadata columns (country, source, st_type) as colored bars. offset=0.2, width=0.4, colnames=TRUE, color=NA) p <- p + new_scale_fill() #Allows adding another fill scale (important so each heatmap can have its own discrete color legend). p <- gheatmap(p, heatmap_factor, #Adds the resistance heatmap aligned with tree tips. #Columns are antibiotic names, rotated 45° for clarity. #Uses the specified resistance color palette. offset=0.7, width=0.6, colnames=TRUE, colnames_angle=45, colnames_offset_y=5, font.size=3) + scale_fill_manual(values = resistance_colors, name = "Resistance") print(p) ``` ```{r} # Assuming tree, metadata, heatmap_factor prepared # Define colors country_colors <- setNames(brewer.pal(length(unique(metadata$country)), "Set3"), unique(metadata$country)) source_colors <- setNames(brewer.pal(length(unique(metadata$source)), "Pastel2"), unique(metadata$source)) st_colors <- setNames(brewer.pal(length(unique(metadata$st_type)), "Set1"), unique(metadata$st_type)) resistance_colors <- c("0"="green", "1"="yellow", "2"="orange", "3"="red") # Base tree with ST tip points p <- ggtree(tree, branch.length = "none") %<+% metadata + geom_tiplab(size=3) + geom_tippoint(aes(color = st_type), size = 4) + scale_color_manual(name = "ST Type", values = st_colors) # Add country heatmap p <- gheatmap(p, metadata["country"], offset = 0.2, width = 0.15, colnames = FALSE, color = NA) + scale_fill_manual(name = "Country", values = country_colors) # Add new fill scale for next metadata heatmap p <- p + new_scale_fill() # Add source heatmap p <- gheatmap(p, metadata["source"], offset = 0.37, width = 0.15, colnames = FALSE, color = NA) + scale_fill_manual(name = "Source", values = source_colors) # Add new fill scale for resistance heatmap p <- p + new_scale_fill() # Add resistance heatmap p <- gheatmap(p, heatmap_factor, offset = 0.55, width = 0.6, colnames = TRUE, colnames_angle = 45, colnames_offset_y = 5, font.size = 3) + scale_fill_manual(name = "Resistance", values = resistance_colors, breaks = names(resistance_colors), labels = c("None", "Phenotypic Only", "Genotypic Only", "Both")) print(p) ``` Mini Exercise 5 Try changing the heatmap column order by reordering heatmap_factor columns. How does the plot change? ```{r} ```