Determine patient status at specific end of follow-up - tidyverse version

  fu_end = NULL,
  dattype = NULL,
  status_var = "p_status",
  life_var = NULL,
  spc_var = NULL,
  birthdat_var = NULL,
  lifedat_var = NULL,
  lifedatmin_var = NULL,
  fcdat_var = NULL,
  spcdat_var = NULL,
  life_stat_alive = NULL,
  life_stat_dead = NULL,
  spc_stat_yes = NULL,
  spc_stat_no = NULL,
  lifedat_fu_end = NULL,
  use_lifedatmin = FALSE,
  check = TRUE,
  as_labelled_factor = FALSE



dataframe in wide format


end of follow-up in time format YYYY-MM-DD.


can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.


Name of the newly calculated variable for patient status. Default is p_status.


Name of variable containing life status. Will override dattype preset.


Name of variable containing SPC status. Will override dattype preset.


Name of variable containing Date of Birth. Will override dattype preset.


Name of variable containing Date of Death. Will override dattype preset.


Name of variable containing the minimum Date of Death when true DoD is missing. Will override dattype preset. Will only be used if use_lifedatmin = TRUE.


Name of variable containing Date of Primary Cancer diagnosis. Will override dattype preset.


Name of variable containing Date of SPC diagnosis Will override dattype preset.


Value for alive status in life_var. Will override dattype preset.


Value for dead status in life_var. Will override dattype preset.


Value for SPC occurred in spc_var. Will override dattype preset.


Value for no SPC in spc_var. Will override dattype preset.


Date of last FU of alive status in registry data. Will override dattype preset (2017-03-31 for zfkd; 2018-12-31 for seer).


If TRUE, option to use Date of Death from lifedatmin_var when DOD is missing. Default is FALSE.


Check newly calculated variable p_status. Default is TRUE.


If TRUE, output status_var as labelled factor variable. Default is FALSE.




#load sample data

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 10)
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(   ~ "No SPC",
                                                       !   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(   ~ 1,
                                                              TRUE ~ 0))
#now we can run the function
                       fu_end = "2017-12-31", 
                       dattype = "seer", 
                       status_var = "p_status", 
                       life_var = "p_alive.1", 
                       spc_var = NULL, 
                       birthdat_var = "datebirth.1", 
                       lifedat_var = "datedeath.1",
                       use_lifedatmin = FALSE, 
                       check = TRUE, 
                       as_labelled_factor = FALSE)
#> # A tibble: 11 × 3
#>    p_alive.1 p_status     n
#>    <chr>        <dbl> <int>
#>  1 Alive            1 16051
#>  2 Alive            2 17816
#>  3 Alive           97    19
#>  4 Alive           98  2523
#>  5 Dead             1  2566
#>  6 Dead             2  2086
#>  7 Dead             3 18169
#>  8 Dead             4  8676
#>  9 Dead            97     2
#> 10 Dead            98   147
#> 11 Dead            NA     5
#> # A tibble: 7 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1 18617
#> 2        2 19902
#> 3        3 18169
#> 4        4  8676
#> 5       97    21
#> 6       98  2670
#> 7       NA     5
#> # A tibble: 68,060 × 130
#>    fake_id registry.1 sex.1 race.1 datebirth.1 t_datediag.1 t_site_icd.1 t_dco.1
#>    <chr>   <chr>      <chr> <chr>  <date>      <date>       <chr>        <chr>  
#>  1 100004  SEER Reg … Male  White  1926-01-01  1992-07-15   C50          histol…
#>  2 100034  SEER Reg … Male  White  1979-01-01  2000-06-15   C50          histol…
#>  3 100037  SEER Reg … Fema… White  1938-01-01  1996-01-15   C54          histol…
#>  4 100038  SEER Reg … Male  White  1989-01-01  1991-04-15   C50          histol…
#>  5 100039  SEER Reg … Fema… White  1946-01-01  2003-08-15   C50          histol…
#>  6 100047  SEER Reg … Fema… White  1927-01-01  1998-04-15   C50          histol…
#>  7 100057  SEER Reg … Male  Black  1961-01-01  2010-04-15   C18          histol…
#>  8 100060  SEER Reg … Fema… White  1947-01-01  2003-08-15   C50          histol…
#>  9 100063  SEER Reg … Fema… Black  1938-01-01  1995-12-15   C50          histol…
#> 10 100073  SEER Reg … Male  White  1960-01-01  1993-11-15   C44          histol…
#> # ℹ 68,050 more rows
#> # ℹ 122 more variables: t_hist.1 <int>, fc_age.1 <int>, datedeath.1 <date>,
#> #   p_alive.1 <chr>, p_dodmin.1 <date>, fc_agegroup.1 <chr>,
#> #   t_yeardiag.1 <chr>, registry.2 <chr>, sex.2 <chr>, race.2 <chr>,
#> #   datebirth.2 <date>, t_datediag.2 <date>, t_site_icd.2 <chr>, t_dco.2 <chr>,
#> #   t_hist.2 <int>, fc_age.2 <int>, datedeath.2 <date>, p_alive.2 <chr>,
#> #   p_dodmin.2 <date>, fc_agegroup.2 <chr>, t_yeardiag.2 <chr>, …