Seismic Events at Mount Vesuvius

Heatmap
R
Author

Steven Villalon

Published

May 13, 2025

#Load dependencies
library(tidyverse)
library(tidytuesdayR)

Load Data

# Load data
tuesdata <- tidytuesdayR::tt_load(2025, week = 19)
vesuvius <- tuesdata$vesuvius
rm(tuesdata)

#View(vesuvius)

Cleaning

# Get summary stats
summary(vesuvius)
    event_id          time                           latitude    
 Min.   :  102   Min.   :2011-04-20 00:27:24.00   Min.   :40.80  
 1st Qu.: 5966   1st Qu.:2016-09-10 06:33:28.00   1st Qu.:40.82  
 Median :14981   Median :2019-05-11 22:23:43.00   Median :40.82  
 Mean   :15494   Mean   :2019-05-20 13:42:07.43   Mean   :40.82  
 3rd Qu.:21388   3rd Qu.:2022-01-13 19:54:11.50   3rd Qu.:40.82  
 Max.   :40802   Max.   :2024-12-31 17:02:32.00   Max.   :40.86  
                                                  NA's   :3433   
   longitude        depth_km     duration_magnitude_md    md_error  
 Min.   :14.35   Min.   :0.010   Min.   :-2.0000       Min.   :0.3  
 1st Qu.:14.43   1st Qu.:0.140   1st Qu.:-0.2000       1st Qu.:0.3  
 Median :14.43   Median :0.240   Median : 0.1000       Median :0.3  
 Mean   :14.43   Mean   :0.407   Mean   : 0.1771       Mean   :0.3  
 3rd Qu.:14.43   3rd Qu.:0.428   3rd Qu.: 0.5000       3rd Qu.:0.3  
 Max.   :14.47   Max.   :9.350   Max.   : 3.1000       Max.   :0.3  
 NA's   :3433    NA's   :3433    NA's   :399           NA's   :399  
     area               type           review_level            year     
 Length:12027       Length:12027       Length:12027       Min.   :2011  
 Class :character   Class :character   Class :character   1st Qu.:2016  
 Mode  :character   Mode  :character   Mode  :character   Median :2019  
                                                          Mean   :2019  
                                                          3rd Qu.:2022  
                                                          Max.   :2024  
                                                                        
# Value counts
table(vesuvius$area)

Mount Vesuvius 
         12027 
table(vesuvius$type)

earthquake 
     12027 
table(vesuvius$review_level)

preliminary     revised 
      12025           2 

Magnitude and depth are the variables of interest. There are many null values and it’s unclear what it means when there was an earthquake event but no magnitude/depth information so I will remove them.

# Drop NAs in columns of interest
vesuvius_clean <- drop_na(vesuvius, c(duration_magnitude_md, depth_km))

vesuvius_clean <- vesuvius_clean |> 
  filter(year >= 2015)

#View(vesuvius_clean)

Explore Data

Univariate

ggplot(data = vesuvius_clean, aes(x = depth_km)) +
  geom_histogram(binwidth = 0.1)

ggplot(data = vesuvius_clean, aes(x = duration_magnitude_md)) +
  geom_histogram(binwidth = 0.1)

Multivariate

# Create scatterplot
ggplot(data = vesuvius_clean, aes(x = depth_km, y = duration_magnitude_md)) +
  geom_point()

# Cross-tab of events per year
vesuvius_events_per_year <- vesuvius_clean |> 
  group_by(year) |> 
  summarize(event_cnt = n(), .groups = "drop")

vesuvius_events_per_year
# Mean events per year
mean(vesuvius_events_per_year$event_cnt)
[1] 831.4
# Cross-tab of events per month
vesuvius_events_per_mo <- vesuvius_clean |> 
  group_by(year, month(vesuvius_clean$time, label = TRUE, abbr = TRUE)) |> 
  summarize(event_cnt = n(), .groups = "drop")

colnames(vesuvius_events_per_mo) <- c("year", "month", "event_cnt")

vesuvius_events_per_mo
# Compute mean events by month
mean_events_per_mo <- vesuvius_events_per_mo |> 
  group_by(month) |> 
  summarize(avg_events = mean(event_cnt))

mean_events_per_mo

Modeling

attach(vesuvius)
model1 <- lm(duration_magnitude_md ~ depth_km, vesuvius)
summary(model1)

Call:
lm(formula = duration_magnitude_md ~ depth_km, data = vesuvius)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.4884 -0.3374 -0.1066  0.2534  2.7279 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.191467   0.007036   27.21   <2e-16 ***
depth_km    0.306100   0.010954   27.95   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5009 on 8473 degrees of freedom
  (3552 observations deleted due to missingness)
Multiple R-squared:  0.08439,   Adjusted R-squared:  0.08428 
F-statistic: 780.9 on 1 and 8473 DF,  p-value: < 2.2e-16

Model has utility and depth has a significant p-value so there does appear to be a relationship between depth and magnitude. However, the \(R^2_a\) is low, indicating that depth is only explaining about 8% of the variability in magnitude.

Visualization

# Make bar plot
ggplot(vesuvius_events_per_mo, aes(x = factor(month), y = event_cnt)) +
  geom_col(fill = "steelblue") +
  facet_wrap(~ year, ncol = 2) +
  labs(
    title = "Monthly Event Counts by Year",
    x = "Month",
    y = "Event Count"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

library(viridis)
Loading required package: viridisLite
library(showtext)
Loading required package: sysfonts
Loading required package: showtextdb
# Load Lato font
font_add_google("Lato", "lato")
showtext_auto()

# Make plot
ggplot(vesuvius_events_per_mo, aes(x = factor(month), y = factor(year, levels = sort(unique(year), decreasing = TRUE)), fill = event_cnt)) +
  geom_tile(color = "white") +
  scale_fill_viridis(option = "plasma", direction = -1) +
  
  # Labels
  labs(
    title = "Micro-Earthquakes at Mount Vesuvius 2015-2024",
    subtitle = "2024 was the most active year for seismic events in the past decade (1.1k vs mean of 0.8k)",
    caption = "\nChart produced by Steven Villalon for Tidy Tuesday exercise on May 13, 2025",
    x = "Month",
    y = "Year",
    fill = "Seismic Events"
  ) +
  
    # Fine details
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 12),
    text = element_text(family = "lato"),
    legend.position = "right",
    plot.caption = element_text(hjust = 0))

Back to top