# Load data from tidytuesdayR packagetuesdata <- tidytuesdayR::tt_load(tt_date)# Extract elements from tuesdatagutenberg_authors <- tuesdata$gutenberg_authorsgutenberg_languages <- tuesdata$gutenberg_languagesgutenberg_metadata <- tuesdata$gutenberg_metadatagutenberg_subjects <- tuesdata$gutenberg_subjects# Remove tuesdata filerm(tuesdata)
3. Examine Data
Show Code
# View datahead(gutenberg_authors)
# A tibble: 6 × 7
gutenberg_author_id author alias birthdate deathdate wikipedia aliases
<dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 1 United States U.S.… NA NA https://… U.S.A.
2 3 Lincoln, Abra… <NA> 1809 1865 https://… United…
3 4 Henry, Patrick <NA> 1736 1799 https://… <NA>
4 5 Adam, Paul <NA> 1849 1931 https://… <NA>
5 7 Carroll, Lewis Dodg… 1832 1898 https://… Dodgso…
6 8 United States… <NA> NA NA https://… Agency…
Show Code
head(gutenberg_languages)
# A tibble: 6 × 3
gutenberg_id language total_languages
<dbl> <chr> <dbl>
1 1 en 1
2 2 en 1
3 3 en 1
4 4 en 1
5 5 en 1
6 6 en 1
Show Code
head(gutenberg_metadata)
# A tibble: 6 × 8
gutenberg_id title author gutenberg_author_id language gutenberg_bookshelf
<dbl> <chr> <chr> <dbl> <chr> <chr>
1 1 "The Dec… Jeffe… 1638 en Politics/American …
2 2 "The Uni… Unite… 1 en Politics/American …
3 3 "John F.… Kenne… 1666 en Browsing: History …
4 4 "Lincoln… Linco… 3 en US Civil War/Brows…
5 5 "The Uni… Unite… 1 en United States/Poli…
6 6 "Give Me… Henry… 4 en American Revolutio…
# ℹ 2 more variables: rights <chr>, has_text <lgl>
Show Code
head(gutenberg_subjects)
# A tibble: 6 × 3
gutenberg_id subject_type subject
<dbl> <chr> <chr>
1 1 lcsh United States -- History -- Revolution, 1775-1783 -…
2 1 lcsh United States. Declaration of Independence
3 1 lcc E201
4 1 lcc JK
5 2 lcsh Civil rights -- United States -- Sources
6 2 lcsh United States. Constitution. 1st-10th Amendments
4. Cleaning
Show Code
# Collapse subjects into 1 row per idpg_subjects <- gutenberg_subjects |>group_by(gutenberg_id) |>summarize(subjects =str_flatten(subject, " | "))head(pg_subjects)
# A tibble: 6 × 2
gutenberg_id subjects
<dbl> <chr>
1 1 United States -- History -- Revolution, 1775-1783 -- Sources | U…
2 2 Civil rights -- United States -- Sources | United States. Consti…
3 3 United States -- Foreign relations -- 1961-1963 | Presidents -- …
4 4 Consecration of cemeteries -- Pennsylvania -- Gettysburg | Soldi…
5 5 United States -- Politics and government -- 1783-1789 -- Sources…
6 6 Speeches, addresses, etc., American | United States -- Politics …
Show Code
# Join pg_subjects to metadata tablepg_clean <- gutenberg_metadata |>left_join(pg_subjects, by ="gutenberg_id") |>select(c("gutenberg_id", "title", "language", "subjects" )) |>na.omit()head(pg_clean)
# A tibble: 6 × 4
gutenberg_id title language subjects
<dbl> <chr> <chr> <chr>
1 1 "The Declaration of Independence of the United… en United …
2 2 "The United States Bill of Rights\r\nThe Ten O… en Civil r…
3 3 "John F. Kennedy's Inaugural Address" en United …
4 4 "Lincoln's Gettysburg Address\r\nGiven Novembe… en Consecr…
5 5 "The United States Constitution" en United …
6 6 "Give Me Liberty or Give Me Death" en Speeche…
Show Code
# List of Latin American countrieslatam_caribbean_countries <-c(# South America"Argentina", "Bolivia", "Brazil", "Chile", "Colombia","Ecuador", "Guyana", "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela",# Central America"Costa Rica", "El Salvador", "Guatemala", "Honduras", "Nicaragua", "Panama",# Caribbean"Cuba", "Dominican Republic", "Puerto Rico",# North America (Spanish-speaking)"Mexico")
Show Code
# Initialize an empty list to collect rowsrows <-list()# Loop over each country and compute countfor (country in latam_caribbean_countries) { count <-sum(str_detect(pg_clean$subjects, fixed(country))) rows[[country]] <-data.frame(country = country,book_count = count )}# Combine all rows into a single data frame and sort descendingcnts_by_country <-bind_rows(rows) |>mutate(cnt_group =case_when( book_count <50~"0 - 49", book_count <100~"50 - 99", book_count >=100~"100+", )) |>mutate(cnt_group =factor(cnt_group, levels =c("0 - 49", "50 - 99", "100+"), ordered =TRUE)) |>arrange(desc(book_count))cnts_by_country
# Set lat/lon parameters for plotting arealong_min <--125long_max <--30lat_min <--60lat_max <-35# Load country shapesworld <-ne_countries(scale ="medium", returnclass ="sf")# Join country shapes to cnts_by_countryworld_counts <- world |>inner_join(cnts_by_country, by =c("name"="country"))# Extract lat/lon from sf objectworld_counts <- world_counts |>mutate(label_point =suppressWarnings(st_point_on_surface(geometry))) |>mutate(lon =st_coordinates(label_point)[, 1],lat =st_coordinates(label_point)[, 2] )
6. Visualization
Show Code
# Load Lato fontfont_add_google("Lato", "lato")showtext_auto()showtext_opts(dpi =300)# Make plotfinal_plot <-ggplot(world_counts) +geom_sf(color ="gray40") +coord_sf(xlim =c(long_min, long_max), ylim =c(lat_min, lat_max) ) +geom_label_repel(data = world_counts |>filter(book_count >=10),aes(x = lon,y = lat,label =paste(name, "\n", book_count),fill = cnt_group),family ="lato",size =3,alpha =0.9,fontface ="bold",label.size =0.2,max.overlaps =30 ) +scale_fill_manual(values =c("0 - 49"="white","50 - 99"="#E6C36D","100+"="#A8C7A1")) +labs(title ="How many books in the Gutenberg online \nlibrary are about Latin America?",subtitle ="Mexico was the sure bet to have the most books, but surprised to see so few for Puerto Rico, \nColombia, and Venezuela. Books in the library are at least 95 years old and are not currently \nunder copyright.",caption ="Chart produced by Steven Villalon for Tidy Tuesday exercise on June 3, 2025.",fill ="Book Counts") +theme_minimal(base_family ="lato") +theme(plot.background =element_rect(fill ="black", color =NA),panel.background =element_rect(fill ="black", color =NA),legend.background =element_rect(fill ="black", color =NA),legend.key =element_rect(fill ="black", color =NA),legend.position ="inside",legend.position.inside =c(0.15, 0.5),legend.justification =c("left", "center"),text =element_text(color ="white"),axis.text =element_blank(),axis.title =element_blank(),axis.ticks =element_blank(),panel.grid =element_blank(),plot.caption =element_text(color ="white", hjust =0),plot.title =element_text(color ="#E8A6A1", face ="bold", size =20),plot.subtitle =element_text(color ="gray90", size =10) ) +guides(fill =guide_legend(override.aes =list(label =""))) # Remove "a" from legend
7. Export Visualization
Show Code
# Select file formats to export toformats_to_export <-c("png", "svg")# Save files to the output folder (uses custom R script)save_tt_plots(plot = final_plot, title = title, date = tt_date,output_folder ="output", formats = formats_to_export, height =8,width =6,dpi =300 )