Extract Type 23 Data

Extract Type 23 Performance Data
wikipedia
scrape
Author

Defence Economist

Published

April 1, 2023

Objective

Extract Type 23 performance data from wikipedia

Data

url <- "https://en.wikipedia.org/wiki/Type_23_frigate"
req <- httr::GET(url) |>
  httr::content()

vars <- c("Name", "Displacement", "Length", "Beam", "Draught")
(df <- req |>
  xml2::xml_find_all("//table[@class = 'infobox']") |>
  rvest::html_table() |>
  purrr::pluck(1) |>
  dplyr::filter(X1 %in% c(vars)) |>
  t() |> dplyr::as_tibble(.name_repair = "minimal") |>
  janitor::row_to_names(1) |>
  dplyr::mutate(dplyr::across(-1, ~stringr::str_sub(.x, 1, stringr::str_locate(.x, "\\(")[,1]-2))) |>
  tidyr::pivot_longer(-1) |>
  dplyr::rename(id = Name) |>
  dplyr::mutate(unit = substr(value, stringr::str_locate(value, "\\s")[,1]+1, nchar(value))) |>
  dplyr::mutate(value = stringr::str_remove_all(value, "[a-z]|,")) |>
  dplyr::mutate(value = stringr::str_trim(value)) |>
  dplyr::mutate(value = as.numeric(value))
)
# A tibble: 4 × 4
  id              name          value unit 
  <chr>           <chr>         <dbl> <chr>
1 Type 23 frigate Displacement 4900   t    
2 Type 23 frigate Length        133   m    
3 Type 23 frigate Beam           16.1 m    
4 Type 23 frigate Draught         7.3 m    
df |> dplyr::select(-unit) |> tidyr::pivot_wider()
# A tibble: 1 × 5
  id              Displacement Length  Beam Draught
  <chr>                  <dbl>  <dbl> <dbl>   <dbl>
1 Type 23 frigate         4900    133  16.1     7.3
library(GGally)
Warning: package 'GGally' was built under R version 4.2.3
Loading required package: ggplot2
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
# From the help page:
data(flea)
ggpairs(flea, columns = 2:4, ggplot2::aes(colour=species))