Calculate crude incidence rates and crosstabulate results by break variables

ir_crosstab(
  df,
  dattype = NULL,
  count_var,
  xbreak_var = "none",
  ybreak_vars,
  collapse_ci = FALSE,
  add_total = "no",
  add_n_percentages = FALSE,
  futime_var = NULL,
  alpha = 0.05
)

Arguments

df

dataframe in wide format

dattype

can be "zfkd" or "seer" or NULL. Will set default variable names if dattype is "seer" or "zfkd". Default is NULL.

count_var

variable to be counted as observed case. Should be 1 for case to be counted.

xbreak_var

variable from df by which rates should be stratified in columns of result df. Default is "none".

ybreak_vars

variables from df by which rates should be stratified in rows of result df. Multiple variables will result in appended rows in result df. y_break_vars is required.

collapse_ci

If TRUE upper and lower confidence interval will be collapsed into one column separated by "-". Default is FALSE.

add_total

option to add a row of totals. Can be either "no" for not adding such a row or "top" or "bottom" for adding it at the first or last row. Default is "no".

add_n_percentages

option to add a column of percentages for n_base in its respective yvar_group. Can only be used when xbreak_var = "none". Default is FALSE.

futime_var

variable in df that contains follow-up time per person (in years). Default is set if dattype is given.

alpha

significance level for confidence interval calculations. Default is alpha = 0.05 which will give 95 percent confidence intervals.

Value

df

Examples

#load sample data
data("us_second_cancer")

#prep step - make wide data as this is the required format
usdata_wide <- us_second_cancer %>%
                    msSPChelpR::reshape_wide_tidyr(case_id_var = "fake_id", 
                    time_id_var = "SEQ_NUM", timevar_max = 10)
                    
#prep step - calculate p_spc variable
usdata_wide <- usdata_wide %>%
                 dplyr::mutate(p_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ "No SPC",
                                                       !is.na(t_site_icd.2)   ~ "SPC developed",
                                                       TRUE ~ NA_character_)) %>%
                 dplyr::mutate(count_spc = dplyr::case_when(is.na(t_site_icd.2)   ~ 1,
                                                              TRUE ~ 0))
                                                              
#prep step - create patient status variable
usdata_wide <- usdata_wide %>%
                  msSPChelpR::pat_status(., fu_end = "2017-12-31", dattype = "seer",
                                         status_var = "p_status", life_var = "p_alive.1",
                                         birthdat_var = "datebirth.1", lifedat_var = "datedeath.1")
#> # A tibble: 11 × 3
#>    p_alive.1 p_status     n
#>    <chr>        <dbl> <int>
#>  1 Alive            1 16051
#>  2 Alive            2 17816
#>  3 Alive           97    19
#>  4 Alive           98  2523
#>  5 Dead             1  2566
#>  6 Dead             2  2086
#>  7 Dead             3 18169
#>  8 Dead             4  8676
#>  9 Dead            97     2
#> 10 Dead            98   147
#> 11 Dead            NA     5
#> # A tibble: 7 × 2
#>   p_status     n
#>      <dbl> <int>
#> 1        1 18617
#> 2        2 19902
#> 3        3 18169
#> 4        4  8676
#> 5       97    21
#> 6       98  2670
#> 7       NA     5
 
#now we can run the function
usdata_wide <- usdata_wide %>%
                 msSPChelpR::calc_futime(., 
                        futime_var_new = "p_futimeyrs", 
                        fu_end = "2017-12-31",
                        dattype = "seer", 
                        time_unit = "years",
                        status_var = "p_status",
                        lifedat_var = "datedeath.1", 
                        fcdat_var = "t_datediag.1", 
                        spcdat_var = "t_datediag.2")
#> Warning: There were 6 warnings in `dplyr::summarise()`.
#> The first warning was:
#>  In argument: `min_futime = min(.data[["p_futimeyrs"]], na.rm = TRUE)`.
#>  In group 5: `p_status = 97`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#>  Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
#> # A tibble: 7 × 5
#>   p_status mean_futime min_futime max_futime median_futime
#>      <dbl>       <dbl>      <dbl>      <dbl>         <dbl>
#> 1        1        9.23     0.0438       27.0          8.04
#> 2        2        8.93     0            26.9          7.76
#> 3        3        8.65     0            25.9          7.54
#> 4        4        6.46     0            25.3          5.33
#> 5       97      NaN      Inf          -Inf           NA   
#> 6       98      NaN      Inf          -Inf           NA   
#> 7       NA      NaN      Inf          -Inf           NA   
                    
#for example, you can calculate incidence and summarize by sex and registry
msSPChelpR::ir_crosstab(usdata_wide,
      dattype = "seer",
      count_var = "count_spc",
      xbreak_var = "none",
      ybreak_vars = c("sex.1", "registry.1"),
      collapse_ci = FALSE,
      add_total = "no",
      add_n_percentages = FALSE,
      futime_var = "p_futimeyrs",
      alpha = 0.05)
#> # A tibble: 6 × 8
#>   yvar_name  yvar_label      n_base observed   pyar abs_ir abs_ir_lci abs_ir_uci
#>   <chr>      <chr>            <int>    <dbl>  <dbl>  <dbl>      <dbl>      <dbl>
#> 1 sex.1      Female           33786    18218 279373  6521.      6427.      6616.
#> 2 sex.1      Male             34274    18375 282810  6497.      6404.      6592.
#> 3 registry.1 SEER Reg 01 - …  22738    12194 187123  6517.      6401.      6633.
#> 4 registry.1 SEER Reg 02 - …  15156     8233 124843  6595.      6453.      6739.
#> 5 registry.1 SEER Reg 20 - …  22722    12241 188690  6487.      6373.      6603.
#> 6 registry.1 SEER Reg 21 - …   7444     3925  61526  6379.      6181.      6582.