suppressPackageStartupMessages({ library(readxl) library(lattice) library(splines) library(robustbase) library(lubridate) library(httr) library(tidyr) library(tidyverse) library(MASS) }) # create the URL where the dataset is stored with automatic updates every day URL <- "https://data.nsw.gov.au/data/dataset/97ea2424-abaf-4f3e-a9f2-b5c883f42b6a/resource/2776dbb8-f807-4fb2-b1ed-184a6fc2c8aa/download/confirmed_cases_table4_location_likely_source.csv" tf <- tempfile(fileext = ".csv") GET(URL, authenticate(":", ":", type = "ntlm"), write_disk(tf)) origdata <- read_csv(tf) %>% filter((postcode >= 2000) & (postcode < 3000)) %>% filter(!(likely_source_of_infection %in% c("Overseas","Interstate"))) lgacases <- origdata %>% mutate(date = ymd(notification_date)) %>% mutate(days=as.integer(difftime(date,as.Date("2021-06-27"), units = "days"))) %>% filter(days>=0) %>% dplyr::select(-notification_date) %>% # tidy up lga names dplyr::mutate(lga_name19 = str_extract(lga_name19, pattern = "^[[:alpha:]|[:space:]|\\-]+")) %>% # str_locate(origdata$lga_name19, pattern = "\\(") filter(lhd_2010_name %in% c("Northern Sydney","South Eastern Sydney","South Western Sydney","Sydney","Western Sydney", "Nepean Blue Mountains","Central Coast","Illawarra Shoalhaven")) %>% group_by(lga_name19, days) %>% summarise(n = n()) %>% ungroup() # remove last day, as often not complete lastday <- max(lgacases$days) lgacases <- lgacases %>% filter(days!=lastday) stripParams <- list(cex=0.9) lgaplot <- xyplot(n ~ days | lga_name19, ylab="New Cases per Day", panel = function(x, y, subscripts, ...) { savex <- x savey <- y if (length(x)>=10) { if (lastday>length(x)) { thedata <- data.frame(x,y) alldays <- data.frame(x=(1:lastday)) thedata <- merge(thedata,alldays,all=TRUE) thedata$y <- ifelse(is.na(thedata$y),-Inf,thedata$y) x <- thedata$x y <- thedata$y } glmcurr <- glm.nb(10^y ~ ns(x, 1), data = data.frame(x, y)) ispline <- 1 repeat { ispline <- ispline+1 glmnew <- glm.nb(10^y ~ ns(x, ispline), data = data.frame(x, y)) if (AIC(glmnew)>AIC(glmcurr)) break glmcurr <- glmnew } preddata <- data.frame(x = seq(1, lastday, length.out = 101)) pred1 <- predict(glmcurr, se.fit = TRUE, newdata = preddata) # xpoly <- c(preddata$x,rev(preddata$x)) # # ypoly <- c((pred1$fit+1.96*pred1$se.fit),rev(pred1$fit-1.96*pred1$se.fit)) # # panel.polygon(xpoly,log10(exp(ypoly)), col="gray",alpha=0.25) panel.xyplot(preddata$x, log10(exp(pred1$fit)), type = "l", col.line = "red") } panel.xyplot(savex, savey, pch=16) panel.abline(h = 0, lty = 3) panel.abline(h = log10(10), lty = 3) panel.abline(h = log10(100), lty = 3) }, # function xlab="Days After 27 June", lastday=lastday, scales=list(alternating=FALSE,y=list(log=TRUE,at=c(1,2,5,10,20,50,100,200))), as.table=TRUE, par.strip.text = stripParams, lastday=lastday, data= lgacases) print(lgaplot)