R/vital_status.R
vital_status.Rd
Determine vital status at end of follow-up depending on pat_status - tidyverse version
vital_status(
wide_df,
status_var = "p_status",
life_var_new = "p_alive",
check = TRUE,
as_labelled_factor = FALSE
)
dataframe in wide format
Name of the patient status variable that was previously created. Default is p_status.
Name of the newly calculated variable for patient vital status. Default is p_alive.
Check newly calculated variable life_var_new by printing frequency table. Default is TRUE.
If true, output life_var_new as labelled factor variable. Default is FALSE.
wide_df
#load sample data
data("us_second_cancer")
#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id",
time_id_var = "SEQ_NUM", timevar_max = 10)
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2) ~ "No SPC",
!is.na(t_site_icd.2) ~ "SPC developed",
TRUE ~ NA_character_)) %>%
dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2) ~ 1,
TRUE ~ 0))
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
status_var = "p_status", life_var = "p_alive.1",
birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 11 × 3
#> p_alive.1 p_status n
#> <chr> <dbl> <int>
#> 1 Alive 1 16051
#> 2 Alive 2 17816
#> 3 Alive 97 19
#> 4 Alive 98 2523
#> 5 Dead 1 2566
#> 6 Dead 2 2086
#> 7 Dead 3 18169
#> 8 Dead 4 8676
#> 9 Dead 97 2
#> 10 Dead 98 147
#> 11 Dead NA 5
#> # A tibble: 7 × 2
#> p_status n
#> <dbl> <int>
#> 1 1 18617
#> 2 2 19902
#> 3 3 18169
#> 4 4 8676
#> 5 97 21
#> 6 98 2670
#> 7 NA 5
#now we can run the function
msSPChelpR::vital_status(usdata_wide,
status_var = "p_status",
life_var_new = "p_alive_new",
check = TRUE,
as_labelled_factor = FALSE)
#> # A tibble: 7 × 3
#> p_status p_alive_new n
#> <dbl> <dbl> <int>
#> 1 1 10 18617
#> 2 2 10 19902
#> 3 3 11 18169
#> 4 4 11 8676
#> 5 97 97 21
#> 6 98 98 2670
#> 7 NA NA 5
#> # A tibble: 68,060 × 131
#> fake_id registry.1 sex.1 race.1 datebirth.1 t_datediag.1 t_site_icd.1 t_dco.1
#> <chr> <chr> <chr> <chr> <date> <date> <chr> <chr>
#> 1 100004 SEER Reg … Male White 1926-01-01 1992-07-15 C50 histol…
#> 2 100034 SEER Reg … Male White 1979-01-01 2000-06-15 C50 histol…
#> 3 100037 SEER Reg … Fema… White 1938-01-01 1996-01-15 C54 histol…
#> 4 100038 SEER Reg … Male White 1989-01-01 1991-04-15 C50 histol…
#> 5 100039 SEER Reg … Fema… White 1946-01-01 2003-08-15 C50 histol…
#> 6 100047 SEER Reg … Fema… White 1927-01-01 1998-04-15 C50 histol…
#> 7 100057 SEER Reg … Male Black 1961-01-01 2010-04-15 C18 histol…
#> 8 100060 SEER Reg … Fema… White 1947-01-01 2003-08-15 C50 histol…
#> 9 100063 SEER Reg … Fema… Black 1938-01-01 1995-12-15 C50 histol…
#> 10 100073 SEER Reg … Male White 1960-01-01 1993-11-15 C44 histol…
#> # ℹ 68,050 more rows
#> # ℹ 123 more variables: t_hist.1 <int>, fc_age.1 <int>, datedeath.1 <date>,
#> # p_alive.1 <chr>, p_dodmin.1 <date>, fc_agegroup.1 <chr>,
#> # t_yeardiag.1 <chr>, registry.2 <chr>, sex.2 <chr>, race.2 <chr>,
#> # datebirth.2 <date>, t_datediag.2 <date>, t_site_icd.2 <chr>, t_dco.2 <chr>,
#> # t_hist.2 <int>, fc_age.2 <int>, datedeath.2 <date>, p_alive.2 <chr>,
#> # p_dodmin.2 <date>, fc_agegroup.2 <chr>, t_yeardiag.2 <chr>, …