###Lesson 3 code### #Chunk 1 opening and exploring the data# install.packages("tidyverse") library(tidyverse) library(readxl) Moz <- read_excel("Mozambique.xlsx", na = "NA") str(Moz) select(Moz, Lineage, Age, Province) #Select columns select(Moz, -SampleID) #Negative selection, all except filter(Moz, final_dataset == 1) #filter based on a value in a column filter(Moz, final_dataset == 1, Res_Summary == "MDR") #filter on multiple columns mutate(Moz, adult = if_else(Age >= 18, "Yes", "No")) #create a new column with values based on another column group_by(Moz, Lineage) #change the internal structure of the tibble #Chunk 2 using the %>% operator# Moz %>% #Use the Moz dataset group_by(Lineage) %>% #Make internal groupsa based on the Lineage column summarize(avg_age = mean(Age, na.rm = TRUE)) #Create the average age column, based on lineage count(Moz, Lineage) #Count based on lineage column Moz %>% filter(final_dataset == 1) %>% mutate(transmission = if_else(d12 == "ungrouped", "No", "Yes")) %>% group_by(Lineage,transmission) %>% summarise(lineage_count = n()) %>% #count based on two groupings, lineage and transmission ungroup() %>% #remove the internal group styructure group_by(Lineage) %>% mutate(transmission_index = lineage_count/sum(lineage_count)) %>% filter(transmission == "Yes") #Chunk 3 joining tibbles# groups <- Moz %>% #creating a new table for illustration purposes select(SampleID,d12) Moz <- Moz %>% #removing d12 column from Mozambique dataset select(-d12) str(Moz) Moz <- left_join(Moz,groups) #adding groups to Moz #Chunk 4 Transforming data using pivots# data2 <- read.csv("data2.txt", header = TRUE, sep = ";") data2 <- data2 %>% #transform data2 so that height and weight measurements go in one column pivot_longer(cols = c(height, weight), names_to ="Measurement", values_to = "Value") data2 <- data2 %>% #transform data2 back to original form pivot_wider(names_from = "Measurement", values_from = "Value") data2 <- read.csv("data2.txt", header = TRUE, sep = ";") data2 <- data2 %>% #add a column with unique identifiers add_column(name = c("Viola","Ivan", "Christian")) data2 <- data2 %>% pivot_longer(cols = c(height, weight), names_to ="Measurement", values_to = "Value") data2 <- data2 %>% pivot_wider(names_from = "Measurement", values_from = "Value") #Chunk 5 ggplot2# ggplot(Moz, aes(x = Age, y = Weight)) + geom_point() ggplot(Moz, aes(x = Age, y = Weight)) + geom_point(position = "jitter") ggplot(Moz, aes(x = Age, y = Weight)) + geom_point(position = "jitter") + scale_x_continuous(limits = c(10, 60)) + scale_y_continuous(limits = c(20, 80)) ggplot(Moz, aes(x = Age, y = Weight, shape = Lineage)) + geom_point() ggplot(Moz, aes(x = Age, y = Weight, color = Gender)) + geom_point(size = 3, shape = 19) + labs( title = "Scatter Plot: Age vs Weight", x = "Weight", y = "Age" ) ggplot(Moz, aes(x = Age, y = Weight, color = Gender, alpha = quality)) + geom_point(size = 3, shape = 19) ggplot(Moz, aes(x = Age, y = Weight)) + geom_point()+ geom_smooth() ggplot(Moz, aes(x = Age, y = Weight, color = Gender)) + geom_point()+ geom_smooth() ggplot(Moz, aes(x = Year_of_sample_collection, fill = Res_Summary)) + geom_bar() library(viridis) ggplot(Moz, aes(x = Year_of_sample_collection, fill = Res_Summary)) + geom_bar() + scale_fill_viridis(discrete = T) ggplot(Moz, aes(x = Year_of_sample_collection, fill = Res_Summary)) + geom_bar(position = "fill") + scale_fill_viridis(discrete = T) ggplot(Moz, aes(x = Year_of_sample_collection, fill = Res_Summary)) + geom_bar(position = "dodge") + scale_fill_viridis(discrete = T) + theme_classic() ggplot(Moz, aes(x = Lineage, y = Res_Summary)) + geom_point(position = "jitter", aes(color = Year_of_sample_collection)) + facet_wrap(~ Province) + scale_color_viridis(discrete = F, option = "B")