Calculate follow-up time per case until end of follow-up depending on pat_status - tidyverse version

  futime_var_new = "p_futimeyrs",
  dattype = NULL,
  check = TRUE,
  time_unit = "years",
  status_var = "p_status",
  lifedat_var = NULL,
  fcdat_var = NULL,
  spcdat_var = NULL,
  quiet = FALSE



dataframe in wide format


Name of the newly calculated variable for follow-up time. Default is p_futimeyrs.


end of follow-up in time format YYYY-MM-DD.


can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.


Check newly calculated variable p_status by printing frequency table. Default is TRUE.


Unit of follow-up time (can be "days", "weeks", "months", "years"). Default is "years".


Name of the patient status variable that was previously created. Default is p_status.


Name of variable containing Date of Death. Will override dattype preset.


Name of variable containing Date of Primary Cancer diagnosis. Will override dattype preset.


Name of variable containing Date of SPC diagnosis Will override dattype preset.


If TRUE, warnings and messages will be suppressed. Default is FALSE.




#load sample data

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 10)
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(   ~ "No SPC",
                                                       !   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(   ~ 1,
                                                              TRUE ~ 0))
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
                  msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
                                         status_var = "p_status", life_var = "p_alive.1",
                                         birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 11 × 3
#>    p_alive.1 p_status     n
#>    <chr>        <dbl> <int>
#>  1 Alive            1 16051
#>  2 Alive            2 17816
#>  3 Alive           97    19
#>  4 Alive           98  2523
#>  5 Dead             1  2566
#>  6 Dead             2  2086
#>  7 Dead             3 18169
#>  8 Dead             4  8676
#>  9 Dead            97     2
#> 10 Dead            98   147
#> 11 Dead            NA     5
#> # A tibble: 7 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1 18617
#> 2        2 19902
#> 3        3 18169
#> 4        4  8676
#> 5       97    21
#> 6       98  2670
#> 7       NA     5
#now we can run the function
                        futime_var_new = "p_futimeyrs", 
                        fu_end = "2017-12-31",
                        dattype = "seer", 
                        time_unit = "years",
                        status_var = "p_status",
                        lifedat_var = "datedeath.1", 
                        fcdat_var = "t_datediag.1", 
                        spcdat_var = "t_datediag.2")
#> Warning: There were 6 warnings in `dplyr::summarise()`.
#> The first warning was:
#>  In argument: `min_futime = min(.data[["p_futimeyrs"]], na.rm = TRUE)`.
#>  In group 5: `p_status = 97`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#>  Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
#> # A tibble: 7 × 5
#>   p_status mean_futime min_futime max_futime median_futime
#>      <dbl>       <dbl>      <dbl>      <dbl>         <dbl>
#> 1        1        9.23     0.0438       27.0          8.04
#> 2        2        8.93     0            26.9          7.76
#> 3        3        8.65     0            25.9          7.54
#> 4        4        6.46     0            25.3          5.33
#> 5       97      NaN      Inf          -Inf           NA   
#> 6       98      NaN      Inf          -Inf           NA   
#> 7       NA      NaN      Inf          -Inf           NA   
#> # A tibble: 68,060 × 131
#>    fake_id registry.1 sex.1 race.1 datebirth.1 t_datediag.1 t_site_icd.1 t_dco.1
#>    <chr>   <chr>      <chr> <chr>  <date>      <date>       <chr>        <chr>  
#>  1 100004  SEER Reg … Male  White  1926-01-01  1992-07-15   C50          histol…
#>  2 100034  SEER Reg … Male  White  1979-01-01  2000-06-15   C50          histol…
#>  3 100037  SEER Reg … Fema… White  1938-01-01  1996-01-15   C54          histol…
#>  4 100038  SEER Reg … Male  White  1989-01-01  1991-04-15   C50          histol…
#>  5 100039  SEER Reg … Fema… White  1946-01-01  2003-08-15   C50          histol…
#>  6 100047  SEER Reg … Fema… White  1927-01-01  1998-04-15   C50          histol…
#>  7 100057  SEER Reg … Male  Black  1961-01-01  2010-04-15   C18          histol…
#>  8 100060  SEER Reg … Fema… White  1947-01-01  2003-08-15   C50          histol…
#>  9 100063  SEER Reg … Fema… Black  1938-01-01  1995-12-15   C50          histol…
#> 10 100073  SEER Reg … Male  White  1960-01-01  1993-11-15   C44          histol…
#> # ℹ 68,050 more rows
#> # ℹ 123 more variables: t_hist.1 <int>, fc_age.1 <int>, datedeath.1 <date>,
#> #   p_alive.1 <chr>, p_dodmin.1 <date>, fc_agegroup.1 <chr>,
#> #   t_yeardiag.1 <chr>, registry.2 <chr>, sex.2 <chr>, race.2 <chr>,
#> #   datebirth.2 <date>, t_datediag.2 <date>, t_site_icd.2 <chr>, t_dco.2 <chr>,
#> #   t_hist.2 <int>, fc_age.2 <int>, datedeath.2 <date>, p_alive.2 <chr>,
#> #   p_dodmin.2 <date>, fc_agegroup.2 <chr>, t_yeardiag.2 <chr>, …