Code
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.3.2
Code
# This script calculates voluntary and involuntary job separations from the
# labour market.
#+ Setup ----
# Connect to Database
con <- DBI::dbConnect(
odbc::odbc(), driver = "PostgreSQL ODBC Driver(Unicode)",
database = "lfs", uid = "lheley", host = "localhost", pwd = "lheley",
port = 5432, maxvarcharsize = 0
)
# Extract Meta Data
study_meta <- con |>
dplyr::tbl("2qlfs_index") |>
dplyr::select(1:4) |>
dplyr::collect() |>
dplyr::left_join(
con |>
dplyr::tbl("study_filename_lu") |>
dplyr::collect() |>
dplyr::rename(SN = study) |>
dplyr::mutate(SN = as.numeric(SN)),
by = "SN"
)
#+ Define Variables ----
# These variables were defined through comparison of the previous
# ONS publications.
# Where the question or possible responses to a question have changed
# the variable is updated.
wnleft <- c("WNEFT112","WNLEFT2")
relft <- c("REDYL112","REDYL132","REDYLFT2")
sector <- "PUBLICR1"
employment <- "ILODEFR1"
age <- "AGE1"
id <- c("PERSID")
lgwt <- c("LGWT","LGWT18", "LGWT20", "LGWT22") # This responds to different population weights.
vars <- c(id, lgwt, wnleft, relft, sector, employment, age)
tbls <- DBI::dbListTables(con)
tbls <- tbls[grepl("sn_", tbls)]
variables <- tbls |>
purrr::map_df(function(tbl){
variables <- con |>
dplyr::tbl(tbl) |>
head(n = 100) |>
dplyr::collect()
nm <- names(variables)
names(variables) <- toupper(nm)
variables <- variables |>
dplyr::select(tidyselect::any_of(vars)) |>
names()
tibble::tibble(study = substr(tbl, 4, 9), variables)
})
variables |>
dplyr::filter(variables %in% vars) |>
dplyr::mutate(variables2 = dplyr::case_when(
variables %in% relft ~ "REDYLFT",
variables %in% wnleft ~ "WNLEFT",
variables %in% lgwt ~ "LGWT",
TRUE ~ variables
)) |>
dplyr::group_by(study, variables2) |>
dplyr::summarise(n = dplyr::n(), .groups = "drop") |>
dplyr::filter(n > 1L) # A tibble: 1 × 3
study variables2 n
<chr> <chr> <int>
1 8958 LGWT 2
Code
#+ Select tables the contain the variables we need -----
tbls <- paste0("sn_", variables |>
dplyr::filter(variables %in% vars) |>
dplyr::mutate(variables2 = dplyr::case_when(
variables %in% relft ~ "REDYLFT",
variables %in% wnleft ~ "WNLEFT",
variables %in% lgwt ~ "LGWT",
TRUE ~ variables
)) |>
dplyr::select(-variables) |>
dplyr::mutate(value = 1) |>
tidyr::pivot_wider(names_from = variables2, values_from = value,
values_fn = function(x) x[1]) |>
na.omit() |>
dplyr::select(study) |> dplyr::pull())
#+ Calculate Statistics ------
# Loop through the table.
# Calculate the number of people that reason for leaving was voluntary separation
# Calculate the overall people
# This can be extended to include involuntary job separations
# This code chunk loops through the selected tables
# It selects variables which match our specified variables in 'vars'
# It then renames variables with inconsistent names
# Then filters between 16 and 65
# It recode reason left to determine voluntary job separations
# It calculates whether individual left employmnet in last three months
# It then groups by sector and calculate the weighted and unweighted number of
# voluntary job separates by total sector size
vjs <- tbls |>
purrr::map_df(function(tbl){
sql_tbl <- con |> dplyr::tbl(tbl)
sql_tbl <- sql_tbl |>
dplyr::select(tidyselect::any_of(vars))
if(length(which(colnames(sql_tbl) %in% lgwt))>1){
sql_tbl <- sql_tbl |> dplyr::select(-LGWT20)
}
sql_tbl |>
dplyr::rename(REDYLFT = tidyselect::any_of(relft)) |>
dplyr::rename(WNLEFT = tidyselect::any_of(wnleft)) |>
dplyr::rename(LGWT = tidyselect::any_of(lgwt))|>
dplyr::filter(AGE1 >= 16 & AGE1 < 65) |>
dplyr::collect() |>
dplyr::mutate(VJS = dplyr::case_when(
"REDYLFT2" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% 4:9,
"REDYL112" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% 4:10,
"REDYL132" %in% dplyr::tbl_vars(sql_tbl) ~ REDYLFT %in% c(3, 5:11)
)) |>
dplyr::mutate(LFT3M = WNLEFT == 1 & ILODEFR1 == 1) |>
dplyr::mutate(VJS_3M = VJS & LFT3M) |>
dplyr::mutate(EMP = ILODEFR1 == 1) |>
dplyr::mutate(PUBLIC = PUBLICR1 == 2) |>
dplyr::mutate(PRIVATE = PUBLICR1 == 1) |>
dplyr::mutate(SECTOR = ifelse(PUBLIC, "Public", ifelse(PRIVATE, "Private", NA))) |>
dplyr::group_by(SECTOR) |>
dplyr::summarise(vjs_3m_w = crossprod(LGWT, VJS_3M)[1],
vjs_3m = sum(VJS_3M),
n_w = crossprod(EMP, LGWT)[1],
n = sum(EMP), tbl = tbl)
})
vjs_total <- study_meta |>
dplyr::select(tbl = SN, sitdate = End) |>
dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
dplyr::collect() |>
dplyr::left_join(vjs, by = "tbl") |>
dplyr::group_by(sitdate) |>
dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
n_w = sum(n_w)) |>
dplyr::mutate(vjs_rate = vjs_3m_w / n_w)
vjs_sector <- study_meta |>
dplyr::select(tbl = SN, sitdate = End) |>
dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
dplyr::collect() |>
dplyr::left_join(vjs, by = "tbl") |>
dplyr::filter(!is.na(SECTOR)) |>
dplyr::group_by(sitdate, sector = SECTOR) |>
dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
n_w = sum(n_w)) |>
dplyr::mutate(vjs_rate = vjs_3m_w / n_w) `summarise()` has grouped output by 'sitdate'. You can override using the
`.groups` argument.
Code
vjs_total$vjs_rate[vjs_total$vjs_rate == 0] <- NA
vjs_sector$vjs_rate[vjs_sector$vjs_rate == 0] <- NA
ggplot(vjs_total) +
geom_smooth(aes(sitdate, vjs_rate)) +
geom_line(aes(sitdate, vjs_rate))`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 8 rows containing missing values (`geom_line()`).

Code
ggplot(vjs_sector) +
geom_smooth(aes(sitdate, vjs_rate)) +
geom_line(aes(sitdate, vjs_rate)) +
facet_wrap(~sector)`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).

Code
DBI::dbDisconnect(con)
write.csv(vjs_total, "vjs_total.csv", row.names=FALSE)
write.csv(vjs_sector, "vjs_sector.csv", row.names = FALSE)