<- c(
frigates "Type 26", "Type 23", "Type 22",
"Type 21", "Leander Class", "Rothesay Class",
"Whitby Class", "Salisbury Class"
|>
) paste("Frigate")
<- "https://www.wikidata.org"
base <- "/w/api.php"
path <- list(
query action="query",
list="search",
format = "json"
)
<- frigates |>
search_results ::map(~{
purrr$srsearch = .x
query::GET(base, path = path, query = query) |>
httr::content()
httr
})
<- search_results |>
search_results_tidy ::map_df(~{
purrr$query$search[1]; # extract first response
.x|>
} ) ::select(title, snippet) |>
dplyr::mutate(search = frigates) |>
dplyr::relocate(search) dplyr
Objective
Search for wikidata for different UK Frigates
Data
type 26, type 23, type 22, type 21, leander class, rothesay-class, valour-class, Whitby class and Sailsbury class
get wiki url
<- "https://www.wikidata.org/"
base <- search_results_tidy$title |>
req ::map(~{
purrr<- glue::glue("wiki/Special:EntityData/{.x}.json")
path ::GET(url = base, path = path, query = list(flavor = "simple"))
httr
})
<- req |>
(wiki_url ::map_chr(~{
purrr<- httr::content(.x)
cnt $entities[[1]]$sitelinks$enwiki$url
cnt }))
[1] "https://en.wikipedia.org/wiki/Type_26_frigate"
[2] "https://en.wikipedia.org/wiki/Type_23_frigate"
[3] "https://en.wikipedia.org/wiki/Type_22_frigate"
[4] "https://en.wikipedia.org/wiki/Type_21_frigate"
[5] "https://en.wikipedia.org/wiki/Leander-class_frigate"
[6] "https://en.wikipedia.org/wiki/Rothesay-class_frigate"
[7] "https://en.wikipedia.org/wiki/Whitby-class_frigate"
[8] "https://en.wikipedia.org/wiki/Salisbury-class_frigate"
<- wiki_url |>
infobox ::map_df(~{
purrr::GET(.x) |>
httr::content() |>
httr::xml_find_all("//table[@class='infobox']") |>
xml2::html_table() |>
rvest::pluck(1) |>
purrr::mutate(source = .x)
dplyr
})
<- c("Displacement", "Length", "Beam", "Draught")
vars
<- infobox |>
performance ::group_by(source) |>
dplyr::mutate(X0 = ifelse(X1 == X2, X1, NA)) |>
dplyr::fill(X0) |>
tidyr::filter(X1 != X2) |>
dplyr::filter(X1 %in% vars) |>
dplyr::ungroup() |>
dplyr::mutate(key = 1L:dplyr::n()) |>
dplyr::group_by(source) |>
dplyr::mutate(X2 = stringr::str_split(X2, "\n")) |>
dplyr::unnest(cols = "X2") |>
tidyr::ungroup() |>
dplyr::mutate(
dplyrX3 = ifelse(
test = stringr::str_detect(X2, "^[0-9]", negate = TRUE),
yes = substr(X2, 1, stringr::str_locate(X2, ":")[,1]-1),
no = NA)
|>
) ::mutate(
dplyrX2 = ifelse(
test = stringr::str_detect(X2, "^[0-9]", negate = TRUE),
yes = substr(X2, stringr::str_locate(X2, ":")[,1]+2, nchar(X2)),
no = X2)
|>
) ::mutate(
dplyrvalue = as.numeric(stringr::str_remove_all(substr(X2, 1, stringr::str_locate(X2, "\\s")[,1]-1), ","))
|>
) ::mutate(
dplyrunits = substr(X2, stringr::str_locate(X2, "\\s")[,1]+1, nchar(X2))
|>
) ::mutate(
dplyrunits = substr(units, 1, stringr::str_locate(units, "\\s")[,1]-1)
|>
) ::mutate(
dplyrlabel = stringr::str_remove_all(source, "https://en.wikipedia.org/wiki/") |>
::str_replace_all("_", " ")
stringr|>
) ::filter(!is.na(value)) |>
dplyr::group_by(key) |>
dplyr::mutate(increment = 1:dplyr::n()) |>
dplyr::select(key, increment, label, name = X1, value, units) |>
dplyr::mutate(units = dplyr::case_when(
dplyr%in% c("tonnes", "ton", "tonnes,", "tons") ~ "t",
units %in% c("long") ~ "lt",
units %in% c("metres") ~ "m",
units TRUE ~ units
|>
)) ::mutate(
dplyrvalue = dplyr::case_when(
%in% "lt" ~ (value * 1.016047),
units %in% "ft" ~ (value * 0.3048),
units TRUE ~ value
)|>
) ::mutate(
dplyrunits = dplyr::case_when(
%in% "lt" ~ "t",
units %in% "ft" ~ "m",
units TRUE ~ units
)|>
) ::ungroup() |>
dplyr::select(-units, -key, -increment) |>
dplyr::group_by(label, name) |>
dplyr::mutate(increment = 1:dplyr::n()) |>
dplyr::pivot_wider(names_from = name, values_from = value) |>
tidyr::fill(c(Length, Beam, Draught))
tidyr
<- GGally::ggpairs(performance, lower= list(mapping = ggplot2::aes(colour = label)), columns = 3:6) x
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
::ggplotly(x) plotly
Warning: Can only have one: highlight
Warning: Can only have one: highlight
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 3 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 3 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 3 rows containing missing values
Warning: Removed 3 rows containing non-finite values (stat_density).
Warning: Can only have one: highlight