Code
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.3.2
Code
# This script calculates voluntary and involuntary job separations from the
# labour market.
#+ Setup ----
# Connect to Database
<- DBI::dbConnect(
con ::odbc(), driver = "PostgreSQL ODBC Driver(Unicode)",
odbcdatabase = "lfs", uid = "lheley", host = "localhost", pwd = "lheley",
port = 5432, maxvarcharsize = 0
)
# Extract Meta Data
<- con |>
study_meta ::tbl("2qlfs_index") |>
dplyr::select(1:4) |>
dplyr::collect() |>
dplyr::left_join(
dplyr|>
con ::tbl("study_filename_lu") |>
dplyr::collect() |>
dplyr::rename(SN = study) |>
dplyr::mutate(SN = as.numeric(SN)),
dplyrby = "SN"
)
#+ Define Variables ----
# These variables were defined through comparison of the previous
# ONS publications.
# Where the question or possible responses to a question have changed
# the variable is updated.
<- c("WNEFT112","WNLEFT2")
wnleft <- c("REDYL112","REDYL132","REDYLFT2")
relft <- "PUBLICR1"
sector <- "ILODEFR1"
employment <- "AGE1"
age <- c("PERSID")
id <- c("LGWT","LGWT18", "LGWT20", "LGWT22") # This responds to different population weights.
lgwt <- c(id, lgwt, wnleft, relft, sector, employment, age)
vars
<- DBI::dbListTables(con)
tbls <- tbls[grepl("sn_", tbls)]
tbls
<- tbls |>
variables ::map_df(function(tbl){
purrr<- con |>
variables ::tbl(tbl) |>
dplyrhead(n = 100) |>
::collect()
dplyr
<- names(variables)
nm names(variables) <- toupper(nm)
<- variables |>
variables ::select(tidyselect::any_of(vars)) |>
dplyrnames()
::tibble(study = substr(tbl, 4, 9), variables)
tibble
})
|>
variables ::filter(variables %in% vars) |>
dplyr::mutate(variables2 = dplyr::case_when(
dplyr%in% relft ~ "REDYLFT",
variables %in% wnleft ~ "WNLEFT",
variables %in% lgwt ~ "LGWT",
variables TRUE ~ variables
|>
)) ::group_by(study, variables2) |>
dplyr::summarise(n = dplyr::n(), .groups = "drop") |>
dplyr::filter(n > 1L) dplyr
# A tibble: 1 × 3
study variables2 n
<chr> <chr> <int>
1 8958 LGWT 2
Code
#+ Select tables the contain the variables we need -----
<- paste0("sn_", variables |>
tbls ::filter(variables %in% vars) |>
dplyr::mutate(variables2 = dplyr::case_when(
dplyr%in% relft ~ "REDYLFT",
variables %in% wnleft ~ "WNLEFT",
variables %in% lgwt ~ "LGWT",
variables TRUE ~ variables
|>
)) ::select(-variables) |>
dplyr::mutate(value = 1) |>
dplyr::pivot_wider(names_from = variables2, values_from = value,
tidyrvalues_fn = function(x) x[1]) |>
na.omit() |>
::select(study) |> dplyr::pull())
dplyr
#+ Calculate Statistics ------
# Loop through the table.
# Calculate the number of people that reason for leaving was voluntary separation
# Calculate the overall people
# This can be extended to include involuntary job separations
# This code chunk loops through the selected tables
# It selects variables which match our specified variables in 'vars'
# It then renames variables with inconsistent names
# Then filters between 16 and 65
# It recode reason left to determine voluntary job separations
# It calculates whether individual left employmnet in last three months
# It then groups by sector and calculate the weighted and unweighted number of
# voluntary job separates by total sector size
<- tbls |>
vjs ::map_df(function(tbl){
purrr<- con |> dplyr::tbl(tbl)
sql_tbl
<- sql_tbl |>
sql_tbl ::select(tidyselect::any_of(vars))
dplyr
if(length(which(colnames(sql_tbl) %in% lgwt))>1){
<- sql_tbl |> dplyr::select(-LGWT20)
sql_tbl
}
|>
sql_tbl ::rename(REDYLFT = tidyselect::any_of(relft)) |>
dplyr::rename(WNLEFT = tidyselect::any_of(wnleft)) |>
dplyr::rename(LGWT = tidyselect::any_of(lgwt))|>
dplyr::filter(AGE1 >= 16 & AGE1 < 65) |>
dplyr::collect() |>
dplyr::mutate(VJS = dplyr::case_when(
dplyr"REDYLFT2" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% 4:9,
"REDYL112" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% 4:10,
"REDYL132" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% c(3, 5:11)
|>
))
::mutate(LFT3M = WNLEFT == 1 & ILODEFR1 == 1) |>
dplyr::mutate(VJS_3M = VJS & LFT3M) |>
dplyr::mutate(EMP = ILODEFR1 == 1) |>
dplyr::mutate(PUBLIC = PUBLICR1 == 2) |>
dplyr::mutate(PRIVATE = PUBLICR1 == 1) |>
dplyr::mutate(SECTOR = ifelse(PUBLIC, "Public", ifelse(PRIVATE, "Private", NA))) |>
dplyr::group_by(SECTOR) |>
dplyr::summarise(vjs_3m_w = crossprod(LGWT, VJS_3M)[1],
dplyrvjs_3m = sum(VJS_3M),
n_w = crossprod(EMP, LGWT)[1],
n = sum(EMP), tbl = tbl)
})
<- study_meta |>
vjs_total ::select(tbl = SN, sitdate = End) |>
dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
dplyr::collect() |>
dplyr::left_join(vjs, by = "tbl") |>
dplyr::group_by(sitdate) |>
dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
dplyrn_w = sum(n_w)) |>
::mutate(vjs_rate = vjs_3m_w / n_w)
dplyr
<- study_meta |>
vjs_sector ::select(tbl = SN, sitdate = End) |>
dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
dplyr::collect() |>
dplyr::left_join(vjs, by = "tbl") |>
dplyr::filter(!is.na(SECTOR)) |>
dplyr::group_by(sitdate, sector = SECTOR) |>
dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
dplyrn_w = sum(n_w)) |>
::mutate(vjs_rate = vjs_3m_w / n_w) dplyr
`summarise()` has grouped output by 'sitdate'. You can override using the
`.groups` argument.
Code
$vjs_rate[vjs_total$vjs_rate == 0] <- NA
vjs_total$vjs_rate[vjs_sector$vjs_rate == 0] <- NA
vjs_sector
ggplot(vjs_total) +
geom_smooth(aes(sitdate, vjs_rate)) +
geom_line(aes(sitdate, vjs_rate))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 8 rows containing missing values (`geom_line()`).
Code
ggplot(vjs_sector) +
geom_smooth(aes(sitdate, vjs_rate)) +
geom_line(aes(sitdate, vjs_rate)) +
facet_wrap(~sector)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
Code
::dbDisconnect(con)
DBI
write.csv(vjs_total, "vjs_total.csv", row.names=FALSE)
write.csv(vjs_sector, "vjs_sector.csv", row.names = FALSE)