Calculate follow-up time per case until end of follow-up depending on pat_status - tidyverse version

calc_futime(
  wide_df,
  futime_var_new = "p_futimeyrs",
  fu_end,
  dattype = NULL,
  check = TRUE,
  time_unit = "years",
  status_var = "p_status",
  lifedat_var = NULL,
  fcdat_var = NULL,
  spcdat_var = NULL,
  quiet = FALSE
)

Arguments

wide_df: dataframe in wide format
futime_var_new: Name of the newly calculated variable for follow-up time. Default is p_futimeyrs.
fu_end: end of follow-up in time format YYYY-MM-DD.
dattype: can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.
check: Check newly calculated variable p_status by printing frequency table. Default is TRUE.
time_unit: Unit of follow-up time (can be "days", "weeks", "months", "years"). Default is "years".
status_var: Name of the patient status variable that was previously created. Default is p_status.
lifedat_var: Name of variable containing Date of Death. Will override dattype preset.
fcdat_var: Name of variable containing Date of Primary Cancer diagnosis. Will override dattype preset.
spcdat_var: Name of variable containing Date of SPC diagnosis Will override dattype preset.
quiet: If TRUE, warnings and messages will be suppressed. Default is FALSE.

Value

wide_df

Examples

#load sample data
data("us_second_cancer")

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 10)
                    
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ "No SPC",
                                                       !is.na(t_site_icd.2)   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ 1,
                                                              TRUE ~ 0))
                                                              
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
                  msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
                                         status_var = "p_status", life_var = "p_alive.1",
                                         birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 11 × 3
#>    p_alive.1 p_status     n
#>    <chr>        <dbl> <int>
#>  1 Alive            1 16051
#>  2 Alive            2 17816
#>  3 Alive           97    19
#>  4 Alive           98  2523
#>  5 Dead             1  2566
#>  6 Dead             2  2086
#>  7 Dead             3 18169
#>  8 Dead             4  8676
#>  9 Dead            97     2
#> 10 Dead            98   147
#> 11 Dead            NA     5
#> # A tibble: 7 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1 18617
#> 2        2 19902
#> 3        3 18169
#> 4        4  8676
#> 5       97    21
#> 6       98  2670
#> 7       NA     5
 
#now we can run the function
msSPChelpR::calc_futime(usdata_wide, 
                        futime_var_new = "p_futimeyrs", 
                        fu_end = "2017-12-31",
                        dattype = "seer", 
                        time_unit = "years",
                        status_var = "p_status",
                        lifedat_var = "datedeath.1", 
                        fcdat_var = "t_datediag.1", 
                        spcdat_var = "t_datediag.2")
#> Warning: There were 6 warnings in `dplyr::summarise()`.
#> The first warning was:
#> ℹ In argument: `min_futime = min(.data[["p_futimeyrs"]], na.rm = TRUE)`.
#> ℹ In group 5: `p_status = 97`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#> ℹ Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
#> # A tibble: 7 × 5
#>   p_status mean_futime min_futime max_futime median_futime
#>      <dbl>       <dbl>      <dbl>      <dbl>         <dbl>
#> 1        1        9.23     0.0438       27.0          8.04
#> 2        2        8.93     0            26.9          7.76
#> 3        3        8.65     0            25.9          7.54
#> 4        4        6.46     0            25.3          5.33
#> 5       97      NaN      Inf          -Inf           NA   
#> 6       98      NaN      Inf          -Inf           NA   
#> 7       NA      NaN      Inf          -Inf           NA   
#> # A tibble: 68,060 × 131
#>    fake_id registry.1 sex.1 race.1 datebirth.1 t_datediag.1 t_site_icd.1 t_dco.1
#>    <chr>   <chr>      <chr> <chr>  <date>      <date>       <chr>        <chr>  
#>  1 100004  SEER Reg … Male  White  1926-01-01  1992-07-15   C50          histol…
#>  2 100034  SEER Reg … Male  White  1979-01-01  2000-06-15   C50          histol…
#>  3 100037  SEER Reg … Fema… White  1938-01-01  1996-01-15   C54          histol…
#>  4 100038  SEER Reg … Male  White  1989-01-01  1991-04-15   C50          histol…
#>  5 100039  SEER Reg … Fema… White  1946-01-01  2003-08-15   C50          histol…
#>  6 100047  SEER Reg … Fema… White  1927-01-01  1998-04-15   C50          histol…
#>  7 100057  SEER Reg … Male  Black  1961-01-01  2010-04-15   C18          histol…
#>  8 100060  SEER Reg … Fema… White  1947-01-01  2003-08-15   C50          histol…
#>  9 100063  SEER Reg … Fema… Black  1938-01-01  1995-12-15   C50          histol…
#> 10 100073  SEER Reg … Male  White  1960-01-01  1993-11-15   C44          histol…
#> # ℹ 68,050 more rows
#> # ℹ 123 more variables: t_hist.1 <int>, fc_age.1 <int>, datedeath.1 <date>,
#> #   p_alive.1 <chr>, p_dodmin.1 <date>, fc_agegroup.1 <chr>,
#> #   t_yeardiag.1 <chr>, registry.2 <chr>, sex.2 <chr>, race.2 <chr>,
#> #   datebirth.2 <date>, t_datediag.2 <date>, t_site_icd.2 <chr>, t_dco.2 <chr>,
#> #   t_hist.2 <int>, fc_age.2 <int>, datedeath.2 <date>, p_alive.2 <chr>,
#> #   p_dodmin.2 <date>, fc_agegroup.2 <chr>, t_yeardiag.2 <chr>, …