library(dplyr)
library(ggplot2)
nyc_census_info <- read.csv("./2020_Census_Tracts_-_Tabular.csv", header = TRUE)
atlas2 <- read.csv("./atlas_modified.csv", header = TRUE)
nyc <- subset(atlas2, state == 36 & county %in% c(47, 61, 85, 81, 5))
nyc$BoroName <- ifelse(nyc$county == 47, "Brooklyn",
ifelse(nyc$county == 61, "Manhattan",
ifelse(nyc$county == 85, "Staten Island",
ifelse(nyc$county == 81, "Queens",
ifelse(nyc$county == 5, "Bronx", NA)))))
nyc_neigborhood_info <- select(nyc_census_info, BoroName, CT2020, NTAName)
nyc_neigborhood_info <- rename(nyc_neigborhood_info, tract = CT2020)
nyc2 <- merge(nyc, nyc_neigborhood_info[,c("BoroName", "tract", "NTAName")], by = c("BoroName", "tract"), all.x = TRUE)
names(nyc2)[which(names(nyc2) == "NTAName")] <- "neighborhood"
south_bronx_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood %in% c("Mott Haven-Port Morris", "Concourse-Concourse Village", "Melrose"))
riverdale_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood == "Riverdale-Spuyten Duyvil")
First Violin Plot
south_bronx_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood %in% c("Mott Haven-Port Morris", "Concourse-Concourse Village", "Melrose"))
riverdale_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood == "Riverdale-Spuyten Duyvil")
# Combine the two data sets into a single data frame
combined_data <- data.frame(
value = c(south_bronx_bronx$kfr_pooled_pooled_p25, riverdale_bronx$kfr_pooled_pooled_p25),
group = c(rep("South Bronx Bronx", length(south_bronx_bronx$kfr_pooled_pooled_p25)),
rep("Riverdale Bronx", length(riverdale_bronx$kfr_pooled_pooled_p25))))
# Generate the violin plot
ggplot(combined_data, aes(x = group, y = value)) +
geom_violin() +
geom_jitter(width = 0.2, size = 1, alpha = 0.5) + # Add raw data points
geom_boxplot(width = 0.1, fill = "white") + # Add box-and-whisker plot
labs(x = "Group", y = "Value") +
theme_minimal()
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
#Second Violin Plot (better design)
# Load required library
library(ggplot2)
south_bronx_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood %in% c("Mott Haven-Port Morris", "Concourse-Concourse Village", "Melrose"))
riverdale_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood == "Riverdale-Spuyten Duyvil")
# Combine the two data sets into a single data frame
combined_data <- data.frame(
value = c(south_bronx_bronx$kfr_pooled_pooled_p25, riverdale_bronx$kfr_pooled_pooled_p25),
group = c(rep("South Bronx", length(south_bronx_bronx$kfr_pooled_pooled_p25)),
rep("Riverdale", length(riverdale_bronx$kfr_pooled_pooled_p25))))
# Generate the violin plot
ggplot(combined_data, aes(x = group, y = value, fill = group)) +
geom_violin(alpha = 0.5) +
geom_jitter(width = 0.2, size = 1, alpha = 0.5, color = "black") +
geom_boxplot(width = 0.1, fill = "white", alpha = 0.5, outlier.shape = NA) +
labs(x = NULL,
y = "Value",
title = "kfr_pooled_pooled_p25",
fill = NULL) +
theme_minimal() +
theme(legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5), # Center the title
axis.line = element_line(color = 'black')) # Add x and y axis lines
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
Violing Plots – all kfr
#Violing Plots -- all kfr
# Load required libraries
library(tidyverse)
library(tidyr) # Load tidyr specifically
south_bronx_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood %in% c("Mott Haven-Port Morris", "Concourse-Concourse Village", "Melrose"))
riverdale_bronx <- subset(nyc2, BoroName == "Bronx" & neighborhood == "Riverdale-Spuyten Duyvil")
# List of variables to plot
variables <- c("kfr_pooled_pooled_p25", "kfr_black_pooled_p25", "kfr_hisp_pooled_p25", "kfr_white_pooled_p25", "kfr_pooled_female_p25", "kfr_pooled_male_p25", "kfr_black_female_p25", "kfr_hisp_female_p25", "kfr_white_female_p25", "kfr_black_male_p25", "kfr_hisp_male_p25", "kfr_white_male_p25")
# Function to reshape data
reshape_data <- function(data, group_name) {
data %>%
select(one_of(variables)) %>% # select required variables
gather(key = "variable", value = "value") %>% # convert to long format
mutate(group = group_name) # add group column
}
# Reshape data
south_bronx_bronx <- reshape_data(south_bronx_bronx, "South Bronx")
riverdale_bronx <- reshape_data(riverdale_bronx, "Riverdale")
# Combine the two data sets into a single data frame
combined_data <- rbind(south_bronx_bronx, riverdale_bronx)
# Filter out missing values
combined_data <- combined_data %>% filter(!is.na(value))
# Define the desired order
x_order <- c("kfr_pooled_pooled_p25_Riverdale", "kfr_pooled_pooled_p25_South Bronx", "kfr_white_pooled_p25_Riverdale", "kfr_white_pooled_p25_South Bronx", "kfr_black_pooled_p25_Riverdale", "kfr_black_pooled_p25_South Bronx", "kfr_hisp_pooled_p25_Riverdale", "kfr_hisp_pooled_p25_South Bronx", "kfr_pooled_female_p25_Riverdale", "kfr_pooled_female_p25_South Bronx", "kfr_pooled_male_p25_Riverdale", "kfr_pooled_male_p25_South Bronx", "kfr_white_female_p25_Riverdale", "kfr_white_female_p25_South Bronx", "kfr_white_male_p25_Riverdale", "kfr_white_male_p25_South Bronx", "kfr_black_female_p25_Riverdale", "kfr_black_female_p25_South Bronx", "kfr_black_male_p25_Riverdale", "kfr_black_male_p25_South Bronx", "kfr_hisp_female_p25_Riverdale", "kfr_hisp_female_p25_South Bronx", "kfr_hisp_male_p25_Riverdale", "kfr_hisp_male_p25_South Bronx")
# Update the variable_group column in combined_data to an ordered factor with the levels specified in x_order
combined_data$variable_group <- factor(paste(combined_data$variable, combined_data$group, sep = "_"), levels = x_order)
# Generate the violin plot
plot <- ggplot(combined_data, aes(x = variable_group, y = value, fill = group)) +
geom_violin(alpha = 0.5, position = position_dodge(0.8)) +
geom_jitter(width = 0.2, size = 1, alpha = 0.7) + # Add raw data points
geom_boxplot(width = 0.2, fill = "white", position = position_dodge(0.8), outlier.shape = NA) +
labs(x = NULL,
y = "Value",
fill = NULL,
title = "Social Mobility: Riverdale vs. South Bronx") +
theme_minimal() +
theme(legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1), # Rotate x-axis labels for readability
plot.title = element_text(hjust = 0.5), # Center the title
axis.line = element_line(color = 'black')) # Add x and y axis lines
# Display the plot
print(plot)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.