Determine patient status at specific end of follow-up - tidyverse version

pat_status(
  wide_df,
  fu_end = NULL,
  dattype = NULL,
  status_var = "p_status",
  life_var = NULL,
  spc_var = NULL,
  birthdat_var = NULL,
  lifedat_var = NULL,
  lifedatmin_var = NULL,
  fcdat_var = NULL,
  spcdat_var = NULL,
  life_stat_alive = NULL,
  life_stat_dead = NULL,
  spc_stat_yes = NULL,
  spc_stat_no = NULL,
  lifedat_fu_end = NULL,
  use_lifedatmin = FALSE,
  check = TRUE,
  as_labelled_factor = FALSE
)

Arguments

wide_df

dataframe in wide format

fu_end

end of follow-up in time format YYYY-MM-DD.

dattype

can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.

status_var

Name of the newly calculated variable for patient status. Default is p_status.

life_var

Name of variable containing life status. Will override dattype preset.

spc_var

Name of variable containing SPC status. Will override dattype preset.

birthdat_var

Name of variable containing Date of Birth. Will override dattype preset.

lifedat_var

Name of variable containing Date of Death. Will override dattype preset.

lifedatmin_var

Name of variable containing the minimum Date of Death when true DoD is missing. Will override dattype preset. Will only be used if use_lifedatmin = TRUE.

fcdat_var

Name of variable containing Date of Primary Cancer diagnosis. Will override dattype preset.

spcdat_var

Name of variable containing Date of SPC diagnosis Will override dattype preset.

life_stat_alive

Value for alive status in life_var. Will override dattype preset.

life_stat_dead

Value for dead status in life_var. Will override dattype preset.

spc_stat_yes

Value for SPC occurred in spc_var. Will override dattype preset.

spc_stat_no

Value for no SPC in spc_var. Will override dattype preset.

lifedat_fu_end

Date of last FU of alive status in registry data. Will override dattype preset (2017-03-31 for zfkd; 2018-12-31 for seer).

use_lifedatmin

If TRUE, option to use Date of Death from lifedatmin_var when DOD is missing. Default is FALSE.

check

Check newly calculated variable p_status. Default is TRUE.

as_labelled_factor

If TRUE, output status_var as labelled factor variable. Default is FALSE.

Value

wide_df

Examples

#load sample data
data("us_second_cancer")

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 10)
                    
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ "No SPC",
                                                       !is.na(t_site_icd.2)   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ 1,
                                                              TRUE ~ 0))
                                                              
#now we can run the function
msSPChelpR::pat_status(usdata_wide, 
                       fu_end = "2017-12-31", 
                       dattype = "seer", 
                       status_var = "p_status", 
                       life_var = "p_alive.1", 
                       spc_var = NULL, 
                       birthdat_var = "datebirth.1", 
                       lifedat_var = "datedeath.1",
                       use_lifedatmin = FALSE, 
                       check = TRUE, 
                       as_labelled_factor = FALSE)
#> # A tibble: 11 × 3
#>    p_alive.1 p_status     n
#>    <chr>        <dbl> <int>
#>  1 Alive            1 16051
#>  2 Alive            2 17816
#>  3 Alive           97    19
#>  4 Alive           98  2523
#>  5 Dead             1  2566
#>  6 Dead             2  2086
#>  7 Dead             3 18169
#>  8 Dead             4  8676
#>  9 Dead            97     2
#> 10 Dead            98   147
#> 11 Dead            NA     5
#> # A tibble: 7 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1 18617
#> 2        2 19902
#> 3        3 18169
#> 4        4  8676
#> 5       97    21
#> 6       98  2670
#> 7       NA     5
#> # A tibble: 68,060 × 130
#>    fake_id registry.1 sex.1 race.1 datebirth.1 t_datediag.1 t_site_icd.1 t_dco.1
#>    <chr>   <chr>      <chr> <chr>  <date>      <date>       <chr>        <chr>  
#>  1 100004  SEER Reg … Male  White  1926-01-01  1992-07-15   C50          histol…
#>  2 100034  SEER Reg … Male  White  1979-01-01  2000-06-15   C50          histol…
#>  3 100037  SEER Reg … Fema… White  1938-01-01  1996-01-15   C54          histol…
#>  4 100038  SEER Reg … Male  White  1989-01-01  1991-04-15   C50          histol…
#>  5 100039  SEER Reg … Fema… White  1946-01-01  2003-08-15   C50          histol…
#>  6 100047  SEER Reg … Fema… White  1927-01-01  1998-04-15   C50          histol…
#>  7 100057  SEER Reg … Male  Black  1961-01-01  2010-04-15   C18          histol…
#>  8 100060  SEER Reg … Fema… White  1947-01-01  2003-08-15   C50          histol…
#>  9 100063  SEER Reg … Fema… Black  1938-01-01  1995-12-15   C50          histol…
#> 10 100073  SEER Reg … Male  White  1960-01-01  1993-11-15   C44          histol…
#> # ℹ 68,050 more rows
#> # ℹ 122 more variables: t_hist.1 <int>, fc_age.1 <int>, datedeath.1 <date>,
#> #   p_alive.1 <chr>, p_dodmin.1 <date>, fc_agegroup.1 <chr>,
#> #   t_yeardiag.1 <chr>, registry.2 <chr>, sex.2 <chr>, race.2 <chr>,
#> #   datebirth.2 <date>, t_datediag.2 <date>, t_site_icd.2 <chr>, t_dco.2 <chr>,
#> #   t_hist.2 <int>, fc_age.2 <int>, datedeath.2 <date>, p_alive.2 <chr>,
#> #   p_dodmin.2 <date>, fc_agegroup.2 <chr>, t_yeardiag.2 <chr>, …