Fitting epicurves

Tim Taylor

Example

To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.

Loading relevant packages and data

library(outbreaks)
library(i2extras)
#> Loading required package: incidence2
#> Loading required package: grates
library(ggplot2)

raw_dat <- ebola_sim_clean$linelist

For this example we will restrict ourselves to the first 20 weeks of data:

dat <- incidence(
    raw_dat, 
    date_index = "date_of_onset",
    interval = "week",
    groups = "gender"
)[1:20, ]
dat
#> # incidence:  20 x 4
#> # count vars: date_of_onset
#> # groups:     gender
#>    date_index gender count_variable count
#>  * <isowk>    <fct>  <chr>          <int>
#>  1 2014-W15   f      date_of_onset      1
#>  2 2014-W16   m      date_of_onset      1
#>  3 2014-W17   f      date_of_onset      4
#>  4 2014-W17   m      date_of_onset      1
#>  5 2014-W18   f      date_of_onset      4
#>  6 2014-W19   f      date_of_onset      9
#>  7 2014-W19   m      date_of_onset      3
#>  8 2014-W20   f      date_of_onset      7
#>  9 2014-W20   m      date_of_onset     10
#> 10 2014-W21   f      date_of_onset      8
#> 11 2014-W21   m      date_of_onset      7
#> 12 2014-W22   f      date_of_onset      9
#> 13 2014-W22   m      date_of_onset     10
#> 14 2014-W23   f      date_of_onset     13
#> 15 2014-W23   m      date_of_onset     10
#> 16 2014-W24   f      date_of_onset      7
#> 17 2014-W24   m      date_of_onset     14
#> 18 2014-W25   f      date_of_onset     15
#> 19 2014-W25   m      date_of_onset     15
#> 20 2014-W26   f      date_of_onset     12
plot(dat, angle = 45, border_colour = "white")

Modeling incidence

We can use fit_curve() to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.

out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 2 × 9
#>   count_varia…¹ gender     data model estimates  fitti…² fitti…³ predi…⁴ predi…⁵
#>   <chr>         <fct>  <list<t> <lis> <list>     <list>  <list>  <list>  <list> 
#> 1 date_of_onset f      [11 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 2 date_of_onset m       [9 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> # … with abbreviated variable names ¹​count_variable, ²​fitting_warning,
#> #   ³​fitting_error, ⁴​prediction_warning, ⁵​prediction_error
plot(out, angle = 45, border_colour = "white")

growth_rate(out)
#> # A tibble: 2 × 10
#>   count_varia…¹ gender model     r r_lower r_upper growt…²  time time_…³ time_…⁴
#>   <chr>         <fct>  <lis> <dbl>   <dbl>   <dbl> <chr>   <dbl>   <dbl>   <dbl>
#> 1 date_of_onset f      <glm> 0.137  0.0698   0.206 doubli…  5.07    3.36    9.93
#> 2 date_of_onset m      <glm> 0.240  0.146    0.341 doubli…  2.89    2.03    4.75
#> # … with abbreviated variable names ¹​count_variable, ²​growth_or_decay,
#> #   ³​time_lower, ⁴​time_upper

To unnest the data we can use unnest() (a function that has been reexported from the tidyr package.

unnest(out, estimates)
#> # A tibble: 20 × 15
#>    count_v…¹ gender     data model count date_…² estim…³ lower…⁴ upper…⁵ lower…⁶
#>    <chr>     <fct>  <list<t> <lis> <int> <isowk>   <dbl>   <dbl>   <dbl>   <dbl>
#>  1 date_of_… f      [11 × 2] <glm>     1 2014-W…    3.27    1.90    5.61       0
#>  2 date_of_… f      [11 × 2] <glm>     4 2014-W…    4.30    2.83    6.52       0
#>  3 date_of_… f      [11 × 2] <glm>     4 2014-W…    4.93    3.44    7.06       0
#>  4 date_of_… f      [11 × 2] <glm>     9 2014-W…    5.65    4.15    7.68       1
#>  5 date_of_… f      [11 × 2] <glm>     7 2014-W…    6.47    4.99    8.40       1
#>  6 date_of_… f      [11 × 2] <glm>     8 2014-W…    7.42    5.92    9.31       2
#>  7 date_of_… f      [11 × 2] <glm>     9 2014-W…    8.51    6.90   10.5        2
#>  8 date_of_… f      [11 × 2] <glm>    13 2014-W…    9.75    7.88   12.1        3
#>  9 date_of_… f      [11 × 2] <glm>     7 2014-W…   11.2     8.82   14.2        4
#> 10 date_of_… f      [11 × 2] <glm>    15 2014-W…   12.8     9.72   16.9        4
#> 11 date_of_… f      [11 × 2] <glm>    12 2014-W…   14.7    10.6    20.4        5
#> 12 date_of_… m       [9 × 2] <glm>     1 2014-W…    2.01    1.02    3.95       0
#> 13 date_of_… m       [9 × 2] <glm>     1 2014-W…    2.56    1.43    4.59       0
#> 14 date_of_… m       [9 × 2] <glm>     3 2014-W…    4.13    2.73    6.24       0
#> 15 date_of_… m       [9 × 2] <glm>    10 2014-W…    5.25    3.75    7.36       1
#> 16 date_of_… m       [9 × 2] <glm>     7 2014-W…    6.67    5.07    8.79       1
#> 17 date_of_… m       [9 × 2] <glm>    10 2014-W…    8.48    6.69   10.8        2
#> 18 date_of_… m       [9 × 2] <glm>    10 2014-W…   10.8     8.50   13.7        3
#> 19 date_of_… m       [9 × 2] <glm>    14 2014-W…   13.7    10.4    18.0        5
#> 20 date_of_… m       [9 × 2] <glm>    15 2014-W…   17.4    12.4    24.4        6
#> # … with 5 more variables: upper_pi <dbl>, fitting_warning <list>,
#> #   fitting_error <list>, prediction_warning <list>, prediction_error <list>,
#> #   and abbreviated variable names ¹​count_variable, ²​date_index, ³​estimate,
#> #   ⁴​lower_ci, ⁵​upper_ci, ⁶​lower_pi

fit_curve() also works nicely with grouped incidence2 objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.

grouped_dat <- incidence(
    raw_dat, 
    date_index = "date_of_onset",
    interval = "week",
    groups = "hospital"
)[1:120, ]
grouped_dat
#> # incidence:  120 x 4
#> # count vars: date_of_onset
#> # groups:     hospital
#>    date_index hospital                                     count_variable count
#>  * <isowk>    <fct>                                        <chr>          <int>
#>  1 2014-W15   Military Hospital                            date_of_onset      1
#>  2 2014-W16   Connaught Hospital                           date_of_onset      1
#>  3 2014-W17   <NA>                                         date_of_onset      2
#>  4 2014-W17   other                                        date_of_onset      3
#>  5 2014-W18   <NA>                                         date_of_onset      1
#>  6 2014-W18   Connaught Hospital                           date_of_onset      1
#>  7 2014-W18   Princess Christian Maternity Hospital (PCMH) date_of_onset      1
#>  8 2014-W18   Rokupa Hospital                              date_of_onset      1
#>  9 2014-W19   <NA>                                         date_of_onset      1
#> 10 2014-W19   Connaught Hospital                           date_of_onset      3
#> # … with 110 more rows

out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 × 9
#>   count_vari…¹ hospi…²     data model estimates  fitti…³ fitti…⁴ predi…⁵ predi…⁶
#>   <chr>        <fct>   <list<t> <lis> <list>     <list>  <list>  <list>  <list> 
#> 1 date_of_ons… Connau… [22 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 2 date_of_ons… Milita… [21 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 3 date_of_ons… other   [20 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 4 date_of_ons… Prince… [17 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 5 date_of_ons… Rokupa… [18 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> 6 date_of_ons… <NA>    [22 × 2] <glm> <trndng_p> <NULL>  <NULL>  <NULL>  <NULL> 
#> # … with abbreviated variable names ¹​count_variable, ²​hospital,
#> #   ³​fitting_warning, ⁴​fitting_error, ⁵​prediction_warning, ⁶​prediction_error

# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE, angle = 45, border_colour = "white")

growth_rate(out)
#> # A tibble: 6 × 10
#>   count_vari…¹ hospi…² model     r r_lower r_upper growt…³  time time_…⁴ time_…⁵
#>   <chr>        <fct>   <lis> <dbl>   <dbl>   <dbl> <chr>   <dbl>   <dbl>   <dbl>
#> 1 date_of_ons… Connau… <glm> 0.197   0.177   0.217 doubli…  3.53    3.20    3.92
#> 2 date_of_ons… Milita… <glm> 0.173   0.147   0.200 doubli…  4.00    3.46    4.71
#> 3 date_of_ons… other   <glm> 0.170   0.141   0.200 doubli…  4.09    3.46    4.92
#> 4 date_of_ons… Prince… <glm> 0.142   0.101   0.188 doubli…  4.87    3.70    6.87
#> 5 date_of_ons… Rokupa… <glm> 0.178   0.133   0.228 doubli…  3.89    3.04    5.22
#> 6 date_of_ons… <NA>    <glm> 0.184   0.164   0.205 doubli…  3.77    3.39    4.24
#> # … with abbreviated variable names ¹​count_variable, ²​hospital,
#> #   ³​growth_or_decay, ⁴​time_lower, ⁵​time_upper

We provide helper functions, is_ok(), is_warning() and is_error() to help filter the output as necessary.

out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 × 7
#>   count_variable hospital               data model    estimates  fitti…¹ predi…²
#>   <chr>          <fct>              <list<t> <list>   <list>     <list>  <list> 
#> 1 date_of_onset  Connaught Hospital [22 × 2] <negbin> <trndng_p> <chr>   <NULL> 
#> 2 date_of_onset  other              [20 × 2] <negbin> <trndng_p> <chr>   <NULL> 
#> 3 date_of_onset  Princess Christia… [17 × 2] <negbin> <trndng_p> <chr>   <NULL> 
#> 4 date_of_onset  Rokupa Hospital    [18 × 2] <negbin> <trndng_p> <chr>   <NULL> 
#> 5 date_of_onset  <NA>               [22 × 2] <negbin> <trndng_p> <chr>   <NULL> 
#> # … with abbreviated variable names ¹​fitting_warning, ²​prediction_warning
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 × 7
#>    count_variable hospital              data model    estimates  fitti…¹ predi…²
#>    <chr>          <fct>             <list<t> <list>   <list>     <chr>   <list> 
#>  1 date_of_onset  Connaught Hospit… [22 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  2 date_of_onset  Connaught Hospit… [22 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  3 date_of_onset  other             [20 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  4 date_of_onset  other             [20 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  5 date_of_onset  Princess Christi… [17 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  6 date_of_onset  Princess Christi… [17 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  7 date_of_onset  Rokupa Hospital   [18 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  8 date_of_onset  Rokupa Hospital   [18 × 2] <negbin> <trndng_p> iterat… <NULL> 
#>  9 date_of_onset  <NA>              [22 × 2] <negbin> <trndng_p> iterat… <NULL> 
#> 10 date_of_onset  <NA>              [22 × 2] <negbin> <trndng_p> iterat… <NULL> 
#> # … with abbreviated variable names ¹​fitting_warning, ²​prediction_warning

Rolling average

We can add a rolling average, across current and previous intervals, to an incidence2 object with the add_rolling_average() function:

ra <- add_rolling_average(grouped_dat, n = 2L) # group observations with the 2 prior
plot(ra, border_colour = "white", angle = 45) +
    geom_line(aes(x = date_index, y = rolling_average))
#> Warning: Removed 1 row containing missing values (`geom_line()`).