R/ir_crosstab.R
ir_crosstab.Rd
Calculate crude incidence rates and crosstabulate results by break variables
ir_crosstab(
df,
dattype = NULL,
count_var,
xbreak_var = "none",
ybreak_vars,
collapse_ci = FALSE,
add_total = "no",
add_n_percentages = FALSE,
futime_var = NULL,
alpha = 0.05
)
dataframe in wide format
can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.
variable to be counted as observed case. Should be 1 for case to be counted.
variable from df by which rates should be stratified in columns of result df. Default is "none".
variables from df by which rates should be stratified in rows of result df. Multiple variables will result in appended rows in result df. y_break_vars is required.
If TRUE upper and lower confidence interval will be collapsed into one column separated by "-". Default is FALSE.
option to add a row of totals. Can be either "no" for not adding such a row or "top" or "bottom" for adding it at the first or last row. Default is "no".
option to add a column of percentages for n_base in its respective yvar_group. Can only be used when xbreak_var = "none". Default is FALSE.
variable in df that contains follow-up time per person (in years). Default is set if dattype is given.
significance level for confidence interval calculations. Default is alpha = 0.05 which will give 95 percent confidence intervals.
df
#load sample data
data("us_second_cancer")
#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id",
time_id_var = "SEQ_NUM", timevar_max = 10)
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2) ~ "No SPC",
!is.na(t_site_icd.2) ~ "SPC developed",
TRUE ~ NA_character_)) %>%
dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2) ~ 1,
TRUE ~ 0))
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
status_var = "p_status", life_var = "p_alive.1",
birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 11 × 3
#> p_alive.1 p_status n
#> <chr> <dbl> <int>
#> 1 Alive 1 16051
#> 2 Alive 2 17816
#> 3 Alive 97 19
#> 4 Alive 98 2523
#> 5 Dead 1 2566
#> 6 Dead 2 2086
#> 7 Dead 3 18169
#> 8 Dead 4 8676
#> 9 Dead 97 2
#> 10 Dead 98 147
#> 11 Dead NA 5
#> # A tibble: 7 × 2
#> p_status n
#> <dbl> <int>
#> 1 1 18617
#> 2 2 19902
#> 3 3 18169
#> 4 4 8676
#> 5 97 21
#> 6 98 2670
#> 7 NA 5
#now we can run the function
usdata_wide <- usdata_wide %>%
msSPChelpR::calc_futime(.,
futime_var_new = "p_futimeyrs",
fu_end = "2017-12-31",
dattype = "seer",
time_unit = "years",
status_var = "p_status",
lifedat_var = "datedeath.1",
fcdat_var = "t_datediag.1",
spcdat_var = "t_datediag.2")
#> Warning: There were 6 warnings in `dplyr::summarise()`.
#> The first warning was:
#> ℹ In argument: `min_futime = min(.data[["p_futimeyrs"]], na.rm = TRUE)`.
#> ℹ In group 5: `p_status = 97`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#> ℹ Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
#> # A tibble: 7 × 5
#> p_status mean_futime min_futime max_futime median_futime
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 9.23 0.0438 27.0 8.04
#> 2 2 8.93 0 26.9 7.76
#> 3 3 8.65 0 25.9 7.54
#> 4 4 6.46 0 25.3 5.33
#> 5 97 NaN Inf -Inf NA
#> 6 98 NaN Inf -Inf NA
#> 7 NA NaN Inf -Inf NA
#for example, you can calculate incidence and summarize by sex and registry
msSPChelpR::ir_crosstab(usdata_wide,
dattype = "seer",
count_var = "count_spc",
xbreak_var = "none",
ybreak_vars = c("sex.1", "registry.1"),
collapse_ci = FALSE,
add_total = "no",
add_n_percentages = FALSE,
futime_var = "p_futimeyrs",
alpha = 0.05)
#> # A tibble: 6 × 8
#> yvar_name yvar_label n_base observed pyar abs_ir abs_ir_lci abs_ir_uci
#> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 sex.1 Female 33786 18218 279373 6521. 6427. 6616.
#> 2 sex.1 Male 34274 18375 282810 6497. 6404. 6592.
#> 3 registry.1 SEER Reg 01 - … 22738 12194 187123 6517. 6401. 6633.
#> 4 registry.1 SEER Reg 02 - … 15156 8233 124843 6595. 6453. 6739.
#> 5 registry.1 SEER Reg 20 - … 22722 12241 188690 6487. 6373. 6603.
#> 6 registry.1 SEER Reg 21 - … 7444 3925 61526 6379. 6181. 6582.