df<-read.csv("Health_Sleep_Statistics.csv") mean(df$Sleep.Quality, na.rm = TRUE) median(df$Sleep.Quality, na.rm = TRUE) sd(df$Sleep.Quality, na.rm = TRUE) (This is the univariate analysis for the x variable) ```{r} barplot( table(df$Physical.Activity.Level), main = "Physical Activity Levels", xlab = "Activity Level", ylab = "Number of People", col = c("lightblue", "lightgreen", "salmon") ) ``` (this is the univariate analysis for the y-variable) library(dplyr) summary_stats <- df %>% group_by(Physical.Activity.Level) %>% summarise( Mean = mean(Sleep.Quality, na.rm = TRUE), Median = median(Sleep.Quality, na.rm = TRUE), SD = sd(Sleep.Quality, na.rm = TRUE), n = n() ) summary_stats (this is the numerical summary of the variables, aligning with the bivariate analysis) library(ggplot2) ggplot(df, aes(x = Physical.Activity.Level, y= Sleep.Quality, fill = Physical.Activity.Level))+ geom_boxplot() + labs( title = "Sleep Quality vs Physical Activity Level", x = "Physical Activity Level", y = "Sleep Hours" ) + scale_fill_manual (values= c("lightblue", "lightgreen", "salmon")) (This is a visualization of the variables, aligning with the bivariate analysis) set.seed(123) n_perm <- 10000 perm_diffs <- replicate(n_perm, { shuffled <- sample(df$Physical.Activity.Level) mean(df$Sleep.Quality[shuffled == "high"], na.rm = TRUE) - mean(df$Sleep.Quality[shuffled == "low"], na.rm = TRUE) }) obs_diff <- mean(df$Sleep.Quality[df$Physical.Activity.Level=="high"], na.rm = TRUE) - mean(df$Sleep.Quality[df$Physical.Activity.Level=="low"], na.rm = TRUE) p_value <- mean(abs(perm_diffs) >= abs(obs_diff)) print(p_value)