Calculate crude incidence rates and cross-tabulate results by break variables; cumulative FU-times as are used as xbreak_var

ir_crosstab_byfutime(
  df,
  dattype = NULL,
  count_var,
  futime_breaks = c(0, 0.5, 1, 5, 10, Inf),
  ybreak_vars,
  collapse_ci = FALSE,
  add_total = "no",
  futime_var = NULL,
  alpha = 0.05
)

Arguments

df

dataframe in wide format

dattype

can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.

count_var

variable to be counted as observed case. Should be 1 for case to be counted.

futime_breaks

vector that indicates split points for follow-up time groups (in years) that will be used as xbreak_var. Default is c(0, .5, 1, 5, 10, Inf) that will result in 5 groups (up to 6 months, 6-12 months, 1-5 years, 5-10 years, 10+ years).

ybreak_vars

variables from df by which rates should be stratified in rows of result df. Multiple variables will result in appended rows in result df. y_break_vars is required.

collapse_ci

If TRUE upper and lower confidence interval will be collapsed into one column separated by "-". Default is FALSE.

add_total

option to add a row of totals. Can be either "no" for not adding such a row or "top" or "bottom" for adding it at the first or last row. Default is "no".

futime_var

variable in df that contains follow-up time per person (in years). Default is set if dattype is given.

alpha

significance level for confidence interval calculations. Default is alpha = 0.05 which will give 95 percent confidence intervals.

Value

df

Examples

#load sample data
data("us_second_cancer")

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    #only use sample
                    dplyr::filter(as.numeric(fake_id) < 200000) %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 2)
#> Long dataset had too many cases per patient. Wide dataset is limited to  2  cases per id as defined in timevar_max option.
                    
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ "No SPC",
                                                       !is.na(t_site_icd.2)   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ 1,
                                                              TRUE ~ 0))
                                                              
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
                  msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
                                         status_var = "p_status", life_var = "p_alive.1",
                                         birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 8 × 3
#>   p_alive.1 p_status     n
#>   <chr>        <dbl> <int>
#> 1 Alive            1  1768
#> 2 Alive            2  1893
#> 3 Alive           98   288
#> 4 Dead             1   278
#> 5 Dead             2   236
#> 6 Dead             3  2009
#> 7 Dead             4  1000
#> 8 Dead            98    14
#> # A tibble: 5 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1  2046
#> 2        2  2129
#> 3        3  2009
#> 4        4  1000
#> 5       98   302
 
#now we can run the function
usdata_wide <- usdata_wide %>%
                 msSPChelpR::calc_futime(., 
                        futime_var_new = "p_futimeyrs", 
                        fu_end = "2017-12-31",
                        dattype = "seer", 
                        time_unit = "years",
                        status_var = "p_status",
                        lifedat_var = "datedeath.1", 
                        fcdat_var = "t_datediag.1", 
                        spcdat_var = "t_datediag.2")
#> Warning: There were 2 warnings in `dplyr::summarise()`.
#> The first warning was:
#>  In argument: `min_futime = min(.data[["p_futimeyrs"]], na.rm = TRUE)`.
#>  In group 5: `p_status = 98`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#>  Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
#> # A tibble: 5 × 5
#>   p_status mean_futime min_futime max_futime median_futime
#>      <dbl>       <dbl>      <dbl>      <dbl>         <dbl>
#> 1        1        9.14     0.0438       27.0          8.04
#> 2        2        8.84     0            26.8          7.67
#> 3        3        8.40     0            25.6          7.29
#> 4        4        6.44     0            24.5          5.08
#> 5       98      NaN      Inf          -Inf           NA   
                    
#for example, you can calculate incidence and summarize by sex and registry
msSPChelpR::ir_crosstab_byfutime(usdata_wide,
      dattype = "seer",
      count_var = "count_spc",
      futime_breaks = c(0, .5, 1, 5, 10, Inf),
      ybreak_vars = c("sex.1", "registry.1"),
      collapse_ci = FALSE,
      add_total = "no",
      futime_var = "p_futimeyrs",
      alpha = 0.05)
#> Warning: The variable for follow-up time has: 309 missings. These will be omitted when creating the crosstabs.
#>    yvar_name                               yvar_label to 6 months_n_base
#> 1      sex.1                                   Female               3609
#> 2      sex.1                                     Male               3568
#> 3 registry.1 SEER Reg 01 - San Francisco-Oakland SMSA               2427
#> 4 registry.1                SEER Reg 02 - Connecticut               1573
#> 5 registry.1     SEER Reg 20 - Detroit (Metropolitan)               2398
#> 6 registry.1                     SEER Reg 21 - Hawaii                779
#>   to 6 months_observed to 6 months_pyar to 6 months_abs_ir
#> 1                  109             1752            6222.35
#> 2                  120             1727            6948.03
#> 3                   75             1178            6369.30
#> 4                   61              760            8031.43
#> 5                   69             1164            5925.41
#> 6                   24              377            6360.26
#>   to 6 months_abs_ir_lci to 6 months_abs_ir_uci 6-12 months_n_base
#> 1                5109.20                7506.01               3425
#> 2                5760.61                8308.14               3372
#> 3                5009.86                7983.98               2306
#> 4                6143.41               10316.71               1477
#> 5                4610.32                7498.98               2282
#> 6                4075.14                9463.57                732
#>   6-12 months_observed 6-12 months_pyar 6-12 months_abs_ir
#> 1                   63             1674            3762.84
#> 2                   65             1650            3939.45
#> 3                   38             1130            3364.15
#> 4                   34              719            4728.90
#> 5                   45             1118            4024.94
#> 6                   11              358            3075.44
#>   6-12 months_abs_ir_lci 6-12 months_abs_ir_uci 1-5 years_n_base
#> 1                2891.47                4814.31             3277
#> 2                3040.39                5021.16             3236
#> 3                2380.67                4617.56             2216
#> 4                3274.90                6608.17             1402
#> 5                2935.82                5385.68             2194
#> 6                1535.25                5502.82              701
#>   1-5 years_observed 1-5 years_pyar 1-5 years_abs_ir 1-5 years_abs_ir_lci
#> 1                503          11045          4554.05              4164.71
#> 2                500          10867          4601.01              4206.51
#> 3                334           7505          4450.20              3985.68
#> 4                221           4713          4689.58              4091.64
#> 5                338           7393          4571.73              4097.28
#> 6                110           2301          4780.17              3928.72
#>   1-5 years_abs_ir_uci 5-10 years_n_base 5-10 years_observed 5-10 years_pyar
#> 1              4969.99              2270                 510            8758
#> 2              5022.55              2263                 511            8776
#> 3              4953.99              1563                 349            6122
#> 4              5350.32               964                 220            3656
#> 5              5086.04              1528                 346            5928
#> 6              5761.40               478                 106            1828
#>   5-10 years_abs_ir 5-10 years_abs_ir_lci 5-10 years_abs_ir_uci
#> 1           5823.20               5328.71               6351.22
#> 2           5822.85               5328.87               6350.31
#> 3           5701.06               5118.55               6331.69
#> 4           6018.15               5249.13               6868.14
#> 5           5836.51               5237.66               6485.07
#> 6           5797.53               4746.54               7011.95
#>   10+ years_n_base 10+ years_observed 10+ years_pyar 10+ years_abs_ir
#> 1             1304                685           7360          9306.76
#> 2             1298                693           7164          9673.73
#> 3              912                479           5110          9374.06
#> 4              532                295           3031          9731.33
#> 5              891                474           4777          9921.71
#> 6              267                130           1605          8098.28
#>   10+ years_abs_ir_lci 10+ years_abs_ir_uci
#> 1              8622.77             10030.58
#> 2              8966.80             10421.57
#> 3              8553.27             10252.36
#> 4              8652.42             10907.58
#> 5              9048.51             10856.44
#> 6              6766.09              9616.05