#Load dependencies
library(tidyverse)
library(tidytuesdayR)
Seismic Events at Mount Vesuvius
Heatmap
R
Load Data
# Load data
<- tidytuesdayR::tt_load(2025, week = 19)
tuesdata <- tuesdata$vesuvius
vesuvius rm(tuesdata)
#View(vesuvius)
Cleaning
# Get summary stats
summary(vesuvius)
event_id time latitude
Min. : 102 Min. :2011-04-20 00:27:24.00 Min. :40.80
1st Qu.: 5966 1st Qu.:2016-09-10 06:33:28.00 1st Qu.:40.82
Median :14981 Median :2019-05-11 22:23:43.00 Median :40.82
Mean :15494 Mean :2019-05-20 13:42:07.43 Mean :40.82
3rd Qu.:21388 3rd Qu.:2022-01-13 19:54:11.50 3rd Qu.:40.82
Max. :40802 Max. :2024-12-31 17:02:32.00 Max. :40.86
NA's :3433
longitude depth_km duration_magnitude_md md_error
Min. :14.35 Min. :0.010 Min. :-2.0000 Min. :0.3
1st Qu.:14.43 1st Qu.:0.140 1st Qu.:-0.2000 1st Qu.:0.3
Median :14.43 Median :0.240 Median : 0.1000 Median :0.3
Mean :14.43 Mean :0.407 Mean : 0.1771 Mean :0.3
3rd Qu.:14.43 3rd Qu.:0.428 3rd Qu.: 0.5000 3rd Qu.:0.3
Max. :14.47 Max. :9.350 Max. : 3.1000 Max. :0.3
NA's :3433 NA's :3433 NA's :399 NA's :399
area type review_level year
Length:12027 Length:12027 Length:12027 Min. :2011
Class :character Class :character Class :character 1st Qu.:2016
Mode :character Mode :character Mode :character Median :2019
Mean :2019
3rd Qu.:2022
Max. :2024
# Value counts
table(vesuvius$area)
Mount Vesuvius
12027
table(vesuvius$type)
earthquake
12027
table(vesuvius$review_level)
preliminary revised
12025 2
Magnitude and depth are the variables of interest. There are many null values and it’s unclear what it means when there was an earthquake event but no magnitude/depth information so I will remove them.
# Drop NAs in columns of interest
<- drop_na(vesuvius, c(duration_magnitude_md, depth_km))
vesuvius_clean
<- vesuvius_clean |>
vesuvius_clean filter(year >= 2015)
#View(vesuvius_clean)
Explore Data
Univariate
ggplot(data = vesuvius_clean, aes(x = depth_km)) +
geom_histogram(binwidth = 0.1)
ggplot(data = vesuvius_clean, aes(x = duration_magnitude_md)) +
geom_histogram(binwidth = 0.1)
Multivariate
# Create scatterplot
ggplot(data = vesuvius_clean, aes(x = depth_km, y = duration_magnitude_md)) +
geom_point()
# Cross-tab of events per year
<- vesuvius_clean |>
vesuvius_events_per_year group_by(year) |>
summarize(event_cnt = n(), .groups = "drop")
vesuvius_events_per_year
# Mean events per year
mean(vesuvius_events_per_year$event_cnt)
[1] 831.4
# Cross-tab of events per month
<- vesuvius_clean |>
vesuvius_events_per_mo group_by(year, month(vesuvius_clean$time, label = TRUE, abbr = TRUE)) |>
summarize(event_cnt = n(), .groups = "drop")
colnames(vesuvius_events_per_mo) <- c("year", "month", "event_cnt")
vesuvius_events_per_mo
# Compute mean events by month
<- vesuvius_events_per_mo |>
mean_events_per_mo group_by(month) |>
summarize(avg_events = mean(event_cnt))
mean_events_per_mo
Modeling
attach(vesuvius)
<- lm(duration_magnitude_md ~ depth_km, vesuvius)
model1 summary(model1)
Call:
lm(formula = duration_magnitude_md ~ depth_km, data = vesuvius)
Residuals:
Min 1Q Median 3Q Max
-1.4884 -0.3374 -0.1066 0.2534 2.7279
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.191467 0.007036 27.21 <2e-16 ***
depth_km 0.306100 0.010954 27.95 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5009 on 8473 degrees of freedom
(3552 observations deleted due to missingness)
Multiple R-squared: 0.08439, Adjusted R-squared: 0.08428
F-statistic: 780.9 on 1 and 8473 DF, p-value: < 2.2e-16
Model has utility and depth has a significant p-value so there does appear to be a relationship between depth and magnitude. However, the \(R^2_a\) is low, indicating that depth is only explaining about 8% of the variability in magnitude.
Visualization
# Make bar plot
ggplot(vesuvius_events_per_mo, aes(x = factor(month), y = event_cnt)) +
geom_col(fill = "steelblue") +
facet_wrap(~ year, ncol = 2) +
labs(
title = "Monthly Event Counts by Year",
x = "Month",
y = "Event Count"
+
) theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(viridis)
Loading required package: viridisLite
library(showtext)
Loading required package: sysfonts
Loading required package: showtextdb
# Load Lato font
font_add_google("Lato", "lato")
showtext_auto()
# Make plot
ggplot(vesuvius_events_per_mo, aes(x = factor(month), y = factor(year, levels = sort(unique(year), decreasing = TRUE)), fill = event_cnt)) +
geom_tile(color = "white") +
scale_fill_viridis(option = "plasma", direction = -1) +
# Labels
labs(
title = "Micro-Earthquakes at Mount Vesuvius 2015-2024",
subtitle = "2024 was the most active year for seismic events in the past decade (1.1k vs mean of 0.8k)",
caption = "\nChart produced by Steven Villalon for Tidy Tuesday exercise on May 13, 2025",
x = "Month",
y = "Year",
fill = "Seismic Events"
+
)
# Fine details
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 12),
text = element_text(family = "lato"),
legend.position = "right",
plot.caption = element_text(hjust = 0))