Calculate Voluntary Job Separation Rates

Calculate voluntary job separation rates from the two quarter labour force survey
Labour Markets
Labour Force Survey
Author

Luke Heley

Published

January 29, 2024

Code
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.3.2
Code
# This script calculates voluntary and involuntary job separations from the 
# labour market. 

#+ Setup ----

# Connect to Database
con <- DBI::dbConnect(
  odbc::odbc(), driver = "PostgreSQL ODBC Driver(Unicode)", 
  database = "lfs", uid = "lheley", host = "localhost", pwd = "lheley", 
  port = 5432, maxvarcharsize = 0
)

# Extract Meta Data
study_meta <- con |> 
    dplyr::tbl("2qlfs_index") |>
    dplyr::select(1:4) |>
    dplyr::collect() |>
    dplyr::left_join(
      con |>
        dplyr::tbl("study_filename_lu") |>
        dplyr::collect() |>
        dplyr::rename(SN = study) |>
        dplyr::mutate(SN = as.numeric(SN)),
      by = "SN"
    )


#+ Define Variables ----
# These variables were defined through comparison of the previous
# ONS publications. 

# Where the question or possible responses to a question have changed
# the variable is updated. 

wnleft <- c("WNEFT112","WNLEFT2")
relft <- c("REDYL112","REDYL132","REDYLFT2")
sector <- "PUBLICR1"
employment <- "ILODEFR1"
age <- "AGE1"
id <- c("PERSID")
lgwt <- c("LGWT","LGWT18", "LGWT20", "LGWT22") # This responds to different population weights.
vars <- c(id, lgwt, wnleft, relft, sector, employment, age)

tbls <- DBI::dbListTables(con)
tbls <- tbls[grepl("sn_", tbls)]

variables <- tbls |>
  purrr::map_df(function(tbl){
    variables <- con |>
      dplyr::tbl(tbl) |>
      head(n = 100) |>
      dplyr::collect() 
    
    nm <- names(variables)
    names(variables) <- toupper(nm)
    
  variables <- variables |>  
    dplyr::select(tidyselect::any_of(vars)) |>
      names()
    
    tibble::tibble(study = substr(tbl, 4, 9), variables)
  })


variables |> 
  dplyr::filter(variables %in% vars) |>
  dplyr::mutate(variables2 = dplyr::case_when(
                   variables %in% relft ~ "REDYLFT",
                   variables %in% wnleft ~ "WNLEFT",
                   variables %in% lgwt ~ "LGWT",
                   TRUE ~ variables
                 )) |>
  dplyr::group_by(study, variables2) |>
  dplyr::summarise(n = dplyr::n(), .groups = "drop") |>
  dplyr::filter(n > 1L) 
# A tibble: 1 × 3
  study variables2     n
  <chr> <chr>      <int>
1 8958  LGWT           2
Code
#+ Select tables the contain the variables we need -----
tbls <- paste0("sn_", variables |> 
                 dplyr::filter(variables %in% vars) |>
                 dplyr::mutate(variables2 = dplyr::case_when(
                   variables %in% relft ~ "REDYLFT",
                   variables %in% wnleft ~ "WNLEFT",
                   variables %in% lgwt ~ "LGWT",
                   TRUE ~ variables
                 )) |>
                 dplyr::select(-variables) |>
                 dplyr::mutate(value = 1) |>
                 tidyr::pivot_wider(names_from = variables2, values_from = value,
                                    values_fn = function(x) x[1])  |>
                 na.omit() |>
                 dplyr::select(study) |> dplyr::pull())


#+ Calculate Statistics ------
# Loop through the table. 
# Calculate the number of people that reason for leaving was voluntary separation
# Calculate the overall people
# This can be extended to include involuntary job separations


# This code chunk loops through the selected tables
# It selects variables which match our specified variables in 'vars'
# It then renames variables with inconsistent names
# Then filters between 16 and 65
# It recode reason left to determine voluntary job separations
# It calculates whether individual left employmnet in last three months
# It then groups by sector and calculate the weighted and unweighted number of 
#  voluntary job separates by total sector size
vjs <- tbls |>
  purrr::map_df(function(tbl){
    sql_tbl <- con |> dplyr::tbl(tbl) 
    
    sql_tbl <- sql_tbl |>
      dplyr::select(tidyselect::any_of(vars)) 
    
    if(length(which(colnames(sql_tbl) %in% lgwt))>1){
      sql_tbl <- sql_tbl |> dplyr::select(-LGWT20)
    }
    
    sql_tbl |>  
    dplyr::rename(REDYLFT = tidyselect::any_of(relft)) |>
      dplyr::rename(WNLEFT = tidyselect::any_of(wnleft)) |>
      dplyr::rename(LGWT = tidyselect::any_of(lgwt))|>
      dplyr::filter(AGE1 >= 16 & AGE1 < 65) |>
      dplyr::collect() |>
      dplyr::mutate(VJS = dplyr::case_when(
        "REDYLFT2" %in% dplyr::tbl_vars(sql_tbl) ~  REDYLFT %in% 4:9,
        "REDYL112" %in% dplyr::tbl_vars(sql_tbl) ~  REDYLFT %in% 4:10,
        "REDYL132" %in% dplyr::tbl_vars(sql_tbl) ~  REDYLFT %in% c(3, 5:11)
      )) |>
     
      dplyr::mutate(LFT3M = WNLEFT == 1 & ILODEFR1 == 1) |> 
      dplyr::mutate(VJS_3M =  VJS & LFT3M) |>
      dplyr::mutate(EMP = ILODEFR1 == 1) |>
      dplyr::mutate(PUBLIC = PUBLICR1 == 2) |>
      dplyr::mutate(PRIVATE = PUBLICR1 == 1) |>
      dplyr::mutate(SECTOR = ifelse(PUBLIC, "Public", ifelse(PRIVATE, "Private", NA))) |>
      dplyr::group_by(SECTOR) |>
      dplyr::summarise(vjs_3m_w = crossprod(LGWT, VJS_3M)[1],
                       vjs_3m = sum(VJS_3M),
                       n_w = crossprod(EMP, LGWT)[1],
                       n = sum(EMP), tbl = tbl)
  })

vjs_total <- study_meta |> 
  dplyr::select(tbl = SN, sitdate = End) |>
  dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
  dplyr::collect() |>
  dplyr::left_join(vjs, by = "tbl")  |>
  dplyr::group_by(sitdate) |>
  dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
                   n_w = sum(n_w)) |>
  dplyr::mutate(vjs_rate = vjs_3m_w / n_w) 

vjs_sector <- study_meta |> 
  dplyr::select(tbl = SN, sitdate = End) |>
  dplyr::mutate(tbl = paste("sn", tbl, sep = "_")) |>
  dplyr::collect() |>
  dplyr::left_join(vjs, by = "tbl")  |>
  dplyr::filter(!is.na(SECTOR)) |>
  dplyr::group_by(sitdate, sector = SECTOR) |>
  dplyr::summarise(vjs_3m_w = sum(vjs_3m_w),
                   n_w = sum(n_w)) |>
  dplyr::mutate(vjs_rate = vjs_3m_w / n_w) 
`summarise()` has grouped output by 'sitdate'. You can override using the
`.groups` argument.
Code
vjs_total$vjs_rate[vjs_total$vjs_rate == 0] <- NA
vjs_sector$vjs_rate[vjs_sector$vjs_rate == 0] <- NA


ggplot(vjs_total) + 
  geom_smooth(aes(sitdate, vjs_rate)) +
  geom_line(aes(sitdate, vjs_rate))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 8 rows containing missing values (`geom_line()`).

Code
ggplot(vjs_sector) +
  geom_smooth(aes(sitdate, vjs_rate)) +
  geom_line(aes(sitdate, vjs_rate)) +
  facet_wrap(~sector)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).

Code
DBI::dbDisconnect(con)

write.csv(vjs_total, "vjs_total.csv", row.names=FALSE)
write.csv(vjs_sector, "vjs_sector.csv", row.names = FALSE)