Merge branch 'master' of https://gitlab.fi.muni.cz/lexa/coronavirus (18314ca2) · Commits · Matej Lexa / coronavirus

README.md

+10 −0

Original line number	Diff line number	Diff line
		@@ -49,6 +49,8 @@ rmarkdown::render_site()

		[John Hopkins University Data and Links](https://github.com/CSSEGISandData/COVID-19)

		[More than 10,000 UK sequences](https://www.cogconsortium.uk/data/)

		[GISAID](gisaid.org)

		[Italian data](https://github.com/pcm-dpc)
		@@ -65,6 +67,10 @@ rmarkdown::render_site()

		[Daily data from ECDC](https://ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)

		[UCSC Genome Browser - coronavirus page](http://genome.ucsc.edu/goldenPath/newsarch.html#040320)

		[Nextstrain annotation data](http://data.nextstrain.org/ncov.json)

		## Software

		[HMMER 3.0](http://hmmer.org/)
		@@ -104,3 +110,7 @@ rmarkdown::render_site()
		[Log-scaled comparison of countries starting from x days](https://mentalbreaks.shinyapps.io/covid19/)

		[SARS-CoV-2 protein alignments](http://slim.icr.ac.uk/proviz/)

		[Coronaviruses 101 video](https://www.youtube.com/watch?v=8_bOhZd6ieM)

		[Boni et al., 2020. Evolutionary origins of the SARS-CoV-2 sarbecovirus lineage responsible for the COVID-19 pandemic](https://www.biorxiv.org/content/10.1101/2020.03.30.015008v1)

_site.yml

+2 −0

Original line number	Diff line number	Diff line
		@@ -16,4 +16,6 @@ navbar:
		href: page5.html
		- text: "p6: Time series"
		href: page6.html
		- text: "p6: Maps"
		href: page7.html

page2.Rmd

0 → 100644

+17 −0

Original line number	Diff line number	Diff line
		---
		title: Profile-HMM of coronavirus sequence
		---

		Read fasta files with sequences:

		```{r}
		#install.packages("seqinr")
		library(seqinr)
		sequences <- seqinr::read.fasta('genes/Coronaviridae/all.fasta')
		head(sequences)
		install.packages("aphid")
		library(aphid)
		hmm.PHMMR <- derivePHMM(sequences)
		```

		aphid -> https://cran.r-project.org/web/packages/aphid/vignettes/aphid-vignette.html

page6.Rmd

+171 −8

Original line number	Diff line number	Diff line
		@@ -6,9 +6,9 @@ A quick example of using the ["coronavirus" package](https://github.com/RamiKris

		This is the latest coronavirus data at webpage compilation time, from

		```{r}
		```{r echo=FALSE}
		# We need to reinstall and load the package to get the most current data, which is embedded in the package itself
		devtools::install_github("RamiKrispin/coronavirus")
		# devtools::install_github("RamiKrispin/coronavirus") # can't reinstall in a non-interactive script without root access
		library(dplyr)
		library(coronavirus)
		show(min(coronavirus$date))
		@@ -16,21 +16,184 @@ show(min(coronavirus$date))

		until

		```{r}
		```{r echo=FALSE}
		show(max(coronavirus$date))
		```

		Table with the latest number of cases (top 32):
		Table with the latest number of cases (top 50):

		```{r}
		coronavirus %>% group_by(Country.Region, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "confirmed") %>% as.data.frame() %>% head(32)
		coronavirus %>% group_by(country, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "confirmed") %>% as.data.frame() %>% head(80)
		```


		Table with the latest number of deaths (top 32):
		Table with the latest number of deaths (top 50):

		```{r}
		coronavirus %>% group_by(Country.Region, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "death") %>% as.data.frame() %>% head(32)
		coronavirus %>% group_by(country, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "death") %>% as.data.frame() %>% head(80)
		```

		An FT-like case trajectory (credit goes to [John Burn-Murdoch](https://twitter.com/jburnmurdoch) and [Joachim Gassen](https://joachim-gassen.github.io/2020/03/tidying-the-new-johns-hopkins-covid-19-datasests/)):

		```{r warning=FALSE, echo=FALSE, fig.height = 9, fig.width = 15}
		library(ggplot2)
		library(gghighlight)

		cumulative <- function(row) {
		return(sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == row$type),]$cases))
		}

		active <- function(row) {
		return(
		sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "confirmed"),]$cases) -
		sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "recovered"),]$cases) -
		sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "death"),]$cases)
		)
		}

		all_cumulative <- function() {
		res <- rep(0,length(coronavirus$type))
		for(i in 1:length(coronavirus$type)) {
		res[i] <- cumulative(coronavirus[i,])
		}
		return(res)
		}

		all_active <- function() {
		res <- rep(0,length(coronavirus$total))
		for(i in 1:length(coronavirus$t)) {
		res[i] <- active(coronavirus[i,])
		}
		return(res)
		}


		coronavirus$total <- all_cumulative()
		coronavirus$active <- all_active()

		xdate <- function(row) {
		threshold <- c(confirmed = 350, death = 15, recovered = 150)
		min <- min(coronavirus[which(coronavirus$country == row$country & coronavirus$cases >= threshold[row$type] & coronavirus$type == row$type),]$date)
		return(as.numeric(row$date - min))
		}

		all_xdate <- function() {
		res <- rep(0,length(coronavirus$type))
		for(i in 1:length(coronavirus$type)) {
		res[i] <- xdate(coronavirus[i,])
		}
		return(res)
		}

		coronavirus$xdate <- all_xdate()

		window7 <- function(row) {
		sum <- sum(coronavirus[which(coronavirus$country == row$country & coronavirus$type == row$type & row$date-coronavirus$date < 7 & row$date-coronavirus$date > -1),]$cases)
		return(sum)
		}

		all_window7 <- function() {
		res <- rep(0,length(coronavirus$type))
		for(i in 1:length(coronavirus$type)) {
		res[i] <- window7(coronavirus[i,])
		}
		return(res)
		}

		coronavirus$window7 <- all_window7()

		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"),
		aes(x = xdate, color = country, y = total)) +
		geom_line() +
		labs(
		title = "Confirmed Cases\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))
		```

		Cumulative mortality trajectory:

		```{r echo=FALSE, fig.height = 9, fig.width = 15}
		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "death"),
		aes(x = xdate, color = country, y = total)) +
		geom_line() +
		labs(
		title = "Deaths from COVID-19\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

		```

		Cumulative recovery trajectory

		```{r echo=FALSE, fig.height = 9, fig.width = 15}
		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "recovered"),
		aes(x = xdate, color = country, y = total)) +
		geom_line() +
		labs(
		title = "Recovered\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

		```

		<!--#Number of active cases
		#
		#```{r echo=FALSE, fig.height = 9, fig.width = 15}
		#ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"),
		# aes(x = xdate, color = country, y = active)) +
		# geom_line() +
		# labs(
		# title = "Active\n"
		# ) +
		# scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = #c("1","10","100","1000","10000","100000","1000000")) +
		# gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, #nudge_x = 1, size = 4))
		#
		#```-->

		Confirmed cases floating window last 7 days

		```{r echo=FALSE, fig.height = 9, fig.width = 15}
		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"),
		aes(x = xdate, color = country, y = window7)) +
		geom_line() +
		labs(
		title = "Confirmed Cases last 7 days\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

		```

		Deaths floating window last 7 days

		```{r echo=FALSE, fig.height = 9, fig.width = 15}
		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "death"),
		aes(x = xdate, color = country, y = window7)) +
		geom_line() +
		labs(
		title = "Confirmed Deaths last 7 days\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

		```

		Recovery floating window last 7 days

		```{r echo=FALSE, fig.height = 9, fig.width = 15}
		ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "recovered"),
		aes(x = xdate, color = country, y = window7)) +
		geom_line() +
		labs(
		title = "Recovered last 7 days\n"
		) +
		scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) +
		gghighlight(TRUE, label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

		```

		--

page7.Rmd

0 → 100644

+125 −0

Original line number	Diff line number	Diff line
		---
		title: "page7"
		output: html_document
		---

		```{r setup, include=FALSE}
		knitr::opts_chunk$set(echo = TRUE)
		```

		## R Markdown

		This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

		When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

		```{r}

		packages <- c("RCzechia", "leaflet", "dplyr", "rnaturalearth", "rnaturalearthdata")
		if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
		install.packages(setdiff(packages, rownames(installed.packages())))
		}

		library(leaflet)
		library(dplyr)
		library(RCzechia)
		library("rnaturalearth")
		library("rnaturalearthdata")

		worldMap <- ne_countries(scale = 'large')
		worldMap <- subset(worldMap, sov_a3!="SVK"\| sov_a3!="CZE")

		date <- as.POSIXlt(Sys.Date())
		dateString <- format(date, format="%m-%d-%Y")
		filenameWorld <- paste(dateString,".csv", sep="")

		new_data <- -1

		while (new_data == -1)
		{
		request <- paste("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/", filenameWorld, sep="")
		tryCatch({
		new_data <- read.csv(request)
		}, error=function(e){})
		date <- date -1
		dateString <- format(date, format="%m-%d-%Y")
		filenameWorld <- paste(dateString,".csv", sep="")
		}


		names(worldMap)[names(worldMap) == "admin"] <- "Country_Region"
		grouped <- group_by(new_data,Country_Region)

		worldAgg <- grouped %>% summarise(
		latx = mean(Lat),
		longx = mean(Long_),
		Confirmed = sum(Confirmed)
		)
		levels(worldAgg$Country_Region) <- c(levels(worldAgg$Country_Region),"United States of America")
		worldAgg$Country_Region[which(worldAgg$Country_Region == "US")] <- "United States of America"
		request <- "https://onemocneni-aktualne.mzcr.cz/api/v1/covid-19/osoby.csv"
		czech_new_data <- read.csv(request)

		mergedWorld <- left_join(x = worldMap@data,y = worldAgg,by=c("Country_Region"), all.x=TRUE)

		slovakia <- ne_states(country="Slovakia")
		#czechia <- ne_states(country="Czech Republic")
		czechiaMap <- kraje(resolution = "high")

		names(czechiaMap)[names(czechiaMap) == "KOD_CZNUTS3"] <- "kraj"

		groupedCZ <- group_by(czech_new_data, kraj)

		czechAgg <- groupedCZ %>% summarise(
		cnt = n()
		)

		mergedCZ <- left_join(x = czechiaMap, y = czechAgg, by=c("kraj"), all.x=TRUE)

		labelsSR <- sprintf(
		"<strong>%s</strong><br/>%s NO DATA YET",
		slovakia$name, slovakia$adm1_code
		) %>% lapply(htmltools::HTML)

		labelsCZ <- sprintf(
		"<strong>%s</strong><br/>%s infected",
		mergedCZ$NAZ_CZNUTS3, mergedCZ$cnt
		) %>% lapply(htmltools::HTML)

		labelsWorld <- sprintf(
		"<strong> %s</strong><br/>%s infected",
		mergedWorld$Country_Region, mergedWorld$Confirmed
		) %>% lapply(htmltools::HTML)


		leaf <- leaflet() %>%
		addTiles() %>%
		addPolygons(data=worldMap, color= "#444444",highlightOptions = highlightOptions(color = "white", weight = 2,
		bringToFront = TRUE),
		label = labelsWorld,
		labelOptions = labelOptions(
		style = list("font-weight" = "normal", padding = "3px 8px"),
		textsize = "15px",
		direction = "auto")) %>%
		addPolygons(data=slovakia, color = "#345588", weight = 1, smoothFactor = 0.5,
		opacity = 1.0, fillOpacity = 0.5,
		highlight = highlightOptions(color = "white", weight = 2,
		bringToFront = TRUE),
		label = labelsSR,
		labelOptions = labelOptions(
		style = list("font-weight" = "normal", padding = "3px 8px"),
		textsize = "15px",
		direction = "auto")) %>%
		addPolygons(data=mergedCZ, color = "#345588", weight = 1, smoothFactor = 0.5,
		opacity = 1.0, fillOpacity = 0.5,
		highlightOptions = highlightOptions(color = "white", weight = 2,
		bringToFront = TRUE),
		label = labelsCZ,
		labelOptions = labelOptions(
		style = list("font-weight" = "normal", padding = "3px 8px"),
		textsize = "15px",
		direction = "auto")) %>%
		setView(17,50,6)

		leaf
		```