#-----------------------------------------------------------------------
#
#  Nordic Summer School in Cancer Epidemiology, Copenhagen, 2019
#
#  Exercises on measures of frequency and effects
#  R solutions for Tuesday 13 August, 2019 -- Esa Läärä
#
#-----------------------------------------------------------------------

#  Exercise 2.1: Basic measures in a cohort
#  ----------------------------------------

#  Item 1.
#--------- 

#  The person-years from start of follow-up until cancer, death,
#  or end of 2008, whichever comes first, would be summed as follows
## Y <- 2.5 + 3.5 + 1.5 + 3.0 + 4.5 + 0.5 +
##      1.0 + 2.5 + 2.5 + 2.5 + 1.5 + 1.5
#  However, this calculation has been done for you, so
Y.todis <- 27

#  The total person-time is Y = 27 years. New cases of cancer
#  are D = 5. Thus, the incidence rate is 
#  I = 100xD/Y = 100 x 5/27 y = 18.5 per 100 y
Cases <- 5
Irate <- 100*Cases/Y.todis
Irate

#  Item 2.
#---------
#  Person-times until death or censoring at the end of 2008:
#  Add the follow-up times after diagnosis for the five
#  subjects getting cancer cancer. 
Y.todth <- Y.todis + 2 + 1.5 + 1 + 0.5 + 0.5
Y.todth

#  There was 1 death from cancer and 32.5 person-years, so the
#  mortality rate from cancer is 1/(32.5 y) = 3.1 per 100 years
Dth.C <- 1
Mrate.C <- 100*Dth.C/Y.todth
round(Mrate.C, 1)

#  Item 3.
#---------

#  Total mortality: 4 deaths per 32.5 years = 12.3 per 100 years
Dth.all <- 4
Mrate.all <- 100*Dth.all/Y.todth
round( Mrate.all, 1 )

#  Here there are no competing events. Thus, the 3-year total 
#  mortality proportion is now quite adequately estimated as 30.9 %
Mprop3.all <- 1 - exp( -(Mrate.all/100) * 3 )
100*round( Mprop3.all, 3 )

#  Item 4.
#---------

#  The person-times from diagnosis until death or censoring make 5.5 y.
Y.distodth <- Y.todth - Y.todis 
##  or Y.distodth <- 2 + 1.5 + 1 + 0.5 + 0.5
Y.distodth

#  The number of deaths among those with cancer is 2, so the
#  total mortality rate among them is 2/5.5 y = 36.4 per 100 years,
#  and the total 3-year mortality proportion = 66.4 %
D.pts <- 2
Mrate.pts <- 100*D.pts/Y.distodth
round( Mrate.pts, 1 )
Mprop3.pts <- 1 - exp( -(Mrate.pts/100) * 3 )
round( 100*Mprop3.pts, 1 )

#  Item 5.
#---------

#  The prevalences at the two time points are 1/7=14.3% and 3/5 = 60%
N1 <- 7 ; N2 <- 5
D1 <- 1 ; D2 <- 3
P1 <- 100*D1/N1; P2 <- 100*D2/N2
round( c(P1, P2), 1)

#  ... or shorter, and if we also want to add nice labels:
Prev <- c(D1, D2)/c(N1, N2)
names(Prev) <- c("30sep2006","31dec2008")
round( 100*Prev, 1 )

#  Comment: The simple formula for computing the incidence proportion or 
#  the mortality proportion for a given cause of death ignores the incidence 
#  of competing events, for instance death before getting cancer when 
#  assessing the incidence of cancer, and death from other causes
#  when considering the cause-specific mortality proportion. When, however, 
#  the total mortality is estimated, there are no competing events.

#-------------------------------------------------------------------------

#  Exercise 2.3: Incidence and mortality -- acute leukaemia
#  --------------------------------------------------------

#  Item 1.
#---------

#  Person-years are obtained by the mid-population principle:
#  about 2465000 years and 483000 years. We shall form a vector
#  containing the person-years of both periods, both expressed
#  in 100000s for later convenience
py1 <- 5*(4.95+4.91)/2
py2 <- (4.85+4.81 )/2 
py <- c(py1, py2)
names(py) <- c("1993-97","1999")
py

#  For the first period, a more sophisticated approximation
#  would yield 2477000 person-years:
#  py1 <- 4.95/2+4.96+4.97+4.96+4.95+4.91/2
#  py1

#  The incidence rates are 113/24.65 = 4.6 and 26/4.83 = 5.4, both 
#  per 100000 y. Calculations are shortest when the pertinent quantities 
#  are arranged in vectors.

cases <- c(113, 26)
incid <- cases/py
round( incid, 2 )

#  Item 2.
#---------

#  The mortality rates are 22/24.65 = 0.9 and 3/4.83 = 0.6, 
#  both per 100000 years
dths <- c(22, 3)
mort <- dths/py
round( mort, 2 )

#---------------------------------------------------------------------------

#  Exercise 2.4: ATBC trial -- prostate cancer
#  -------------------------------------------

#  Item 1.
#  -------

#  When the number of cases D and the incidence rate I = D/Y are
#  known, the person-years are obtained as Y = D/I. Recall, though,
#  that the rates are given using per 10000 years as the unit. 
#  Assign rates and cases into their own vectors as follows, and
#  give names to their elements  
rate <- c(11.6, 17.8)
D <- c(99, 151)
names(rate) <- names(D) <- c("VitE","Plcb")

#  After that you will get the person-years ~85345 y and ~84831 y
pyrs <- D / (rate/10000)
round(pyrs)

#  Item 2.
#---------

#  The "relative risk" is assessed by the incidence rate ratio,
#  and it will be 11.6/17.8 = 0.65
RR <- rate[1]/rate[2]
RR

#  The "excess risk" is assessed by the incidence rate difference,
#  which is 11.6 - 17.8 = -6.2 per 10000 person-years
RD <- rate[1] - rate[2]
RD 

#  Item 3.
#---------

#  As the incidence among those exposed to vitamin E is smaller than
#  that in the control group, it is appropriate to estimate the
#  preventive fraction or relative risk reduction PF = 1 - 0.652 = 0.348,
PF <- 1 - RR
#  PF as a percentage:
round(100*PF, 1)  

#--------------------------------------------------------------------------

#  Exercise 2.5: Comparative measures -- smokers vs. non-smokers
#  -------------------------------------------------------------

#  Item 1.
#---------

#  Again, utilization of vectorised arithmetics is efficient.
#  Assign the rates of the three diseases into vectors;
#  one vector for smokers and the other for non-smokers, and 
#  give names to the elements of the vectors
smok <- c(2.0, 3.0, 15.0)
nonsm <- c(0.2, 1.0, 9.0)
names(smok) <- names(nonsm) <- c("LungCa","OthLung","CVD")
rbind( smok, nonsm )

#  Then we compute the different measures for the three diseases
diff <- smok - nonsm   #  rate differences 1.8, 2 and 6 per 1000 y
ratio <- smok/nonsm    #  rate ratios 10, 3, and 1.67
AF <- (ratio-1)/ratio  #  excess fractions 90, 67 and 40 % 
round( rbind( diff, ratio, AF.pct=100*AF ), 1 )

#--------------------------------------------------------------------------

#  Exercise 2.6: Infant mortality
#  ------------------------------

#  Item 1.
#---------

#  Person-years are approximated as usual, and the mortality rate
#  will be 269/32850 y = 8.19 per 1000 person-years
( y <- (33200 + 32500)/2 )
d <- 269
mort.rate <- 1000*d/y
cbind( d, y, rate = round( mort.rate, 2) )

#  Item 2.
#---------

#  The infant mortality ratio is 269/32800 = 8.20 per 1000 live born
B <- 32800
IMR <- 1000*d/B
round(IMR, 2)

#  This measure is not a rate, nor a proportion; it is a ratio
#  of two counts, in which the numerator is not included in the
#  denominator.

#  Extra: An estimate of the 1-year mortality proportion 
#  for the newborn in 1978 is 8.16 per 1000; obtained as
mort.prop <- 1 - exp( - mort.rate/1000 )
round(1000*mort.prop,2)

#  The values of the three different measures are numerically close 
#  to each other, even though they are conceptually different.

#---------------------------------------------------------------------------