Commit 18314ca2 authored by Ondřej Lošťák's avatar Ondřej Lošťák
Browse files
parents 9ea0b45f 4027e738
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -49,6 +49,8 @@ rmarkdown::render_site()

[John Hopkins University Data and Links](https://github.com/CSSEGISandData/COVID-19)

[More than 10,000 UK sequences](https://www.cogconsortium.uk/data/)

[GISAID](gisaid.org)

[Italian data](https://github.com/pcm-dpc)
@@ -65,6 +67,10 @@ rmarkdown::render_site()

[Daily data from ECDC](https://ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)

[UCSC Genome Browser - coronavirus page](http://genome.ucsc.edu/goldenPath/newsarch.html#040320)

[Nextstrain annotation data](http://data.nextstrain.org/ncov.json)

## Software

[HMMER 3.0](http://hmmer.org/)
@@ -104,3 +110,7 @@ rmarkdown::render_site()
[Log-scaled comparison of countries starting from x days](https://mentalbreaks.shinyapps.io/covid19/)

[SARS-CoV-2 protein alignments](http://slim.icr.ac.uk/proviz/)

[Coronaviruses 101 video](https://www.youtube.com/watch?v=8_bOhZd6ieM)

[Boni et al., 2020. Evolutionary origins of the SARS-CoV-2 sarbecovirus lineage responsible for the COVID-19 pandemic](https://www.biorxiv.org/content/10.1101/2020.03.30.015008v1)
+2 −0
Original line number Diff line number Diff line
@@ -16,4 +16,6 @@ navbar:
      href: page5.html
    - text: "p6: Time series"
      href: page6.html
    - text: "p6: Maps"
      href: page7.html

page2.Rmd

0 → 100644
+17 −0
Original line number Diff line number Diff line
---
title: Profile-HMM of coronavirus sequence
---

Read fasta files with sequences:

```{r}
#install.packages("seqinr")
library(seqinr)
sequences <- seqinr::read.fasta('genes/Coronaviridae/all.fasta')
head(sequences)
install.packages("aphid")
library(aphid)
hmm.PHMMR <- derivePHMM(sequences)
```

aphid ->  https://cran.r-project.org/web/packages/aphid/vignettes/aphid-vignette.html
+171 −8
Original line number Diff line number Diff line
@@ -6,9 +6,9 @@ A quick example of using the ["coronavirus" package](https://github.com/RamiKris

This is the latest coronavirus data at webpage compilation time, from 

```{r}
```{r echo=FALSE}
# We need to reinstall and load the package to get the most current data, which is embedded in the package itself
devtools::install_github("RamiKrispin/coronavirus")
# devtools::install_github("RamiKrispin/coronavirus") # can't reinstall in a non-interactive script without root access
library(dplyr)
library(coronavirus)
show(min(coronavirus$date))
@@ -16,21 +16,184 @@ show(min(coronavirus$date))

 until 

```{r}
```{r echo=FALSE}
show(max(coronavirus$date))
```

Table with the latest number of cases (top 32):
Table with the latest number of cases (top 50):

```{r}
coronavirus %>% group_by(Country.Region, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "confirmed") %>% as.data.frame() %>% head(32)
coronavirus %>% group_by(country, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "confirmed") %>% as.data.frame() %>% head(80)
```


Table with the latest number of deaths (top 32):
Table with the latest number of deaths (top 50):

```{r}
coronavirus %>% group_by(Country.Region, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "death") %>% as.data.frame() %>% head(32)
coronavirus %>% group_by(country, type) %>% summarise(total_cases = sum(cases)) %>% arrange(-total_cases) %>% filter(type == "death") %>% as.data.frame() %>% head(80)
```

An FT-like case trajectory (credit goes to [John Burn-Murdoch](https://twitter.com/jburnmurdoch) and [Joachim Gassen](https://joachim-gassen.github.io/2020/03/tidying-the-new-johns-hopkins-covid-19-datasests/)):

```{r warning=FALSE, echo=FALSE, fig.height = 9, fig.width = 15}
library(ggplot2)
library(gghighlight)

cumulative <- function(row) {
  return(sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == row$type),]$cases))
}

active <- function(row) {
  return(
    sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "confirmed"),]$cases) -
    sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "recovered"),]$cases) -
    sum(coronavirus[which(coronavirus$country == row$country & coronavirus$date <= row$date & coronavirus$type == "death"),]$cases)
  )
}

all_cumulative <- function() {
  res <- rep(0,length(coronavirus$type))
  for(i in 1:length(coronavirus$type)) {
    res[i] <- cumulative(coronavirus[i,])
  }
  return(res)
}

all_active <- function() {
  res <- rep(0,length(coronavirus$total))
  for(i in 1:length(coronavirus$t)) {
    res[i] <- active(coronavirus[i,])
  }
  return(res)
}


coronavirus$total <- all_cumulative()
coronavirus$active <- all_active()

xdate <- function(row) {
  threshold <- c(confirmed = 350, death = 15, recovered = 150)
  min <- min(coronavirus[which(coronavirus$country == row$country & coronavirus$cases >= threshold[row$type] & coronavirus$type == row$type),]$date)
  return(as.numeric(row$date - min))
}

all_xdate <- function() {
  res <- rep(0,length(coronavirus$type))
  for(i in 1:length(coronavirus$type)) {
    res[i] <- xdate(coronavirus[i,])
  }
  return(res)
}

coronavirus$xdate <- all_xdate()

window7 <- function(row) {
  sum <- sum(coronavirus[which(coronavirus$country == row$country & coronavirus$type == row$type & row$date-coronavirus$date < 7 & row$date-coronavirus$date > -1),]$cases)
  return(sum)
}

all_window7 <- function() {
  res <- rep(0,length(coronavirus$type))
  for(i in 1:length(coronavirus$type)) {
    res[i] <- window7(coronavirus[i,])
  }
  return(res)
}

coronavirus$window7 <- all_window7()

ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"), 
       aes(x = xdate, color = country, y = total)) +
  geom_line() +
  labs(
    title = "Confirmed Cases\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))
```

Cumulative mortality trajectory:

```{r echo=FALSE, fig.height = 9, fig.width = 15}
ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "death"), 
       aes(x = xdate, color = country, y = total)) +
  geom_line() +
  labs(
    title = "Deaths from COVID-19\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

```

Cumulative recovery trajectory

```{r echo=FALSE, fig.height = 9, fig.width = 15}
ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "recovered"), 
       aes(x = xdate, color = country, y = total)) +
  geom_line() +
  labs(
    title = "Recovered\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

```

<!--#Number of active cases
#
#```{r echo=FALSE, fig.height = 9, fig.width = 15}
#ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"), 
#       aes(x = xdate, color = country, y = active)) +
#  geom_line() +
#  labs(
#    title = "Active\n"
#  ) + 
#  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = #c("1","10","100","1000","10000","100000","1000000")) + 
#  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, #nudge_x = 1, size = 4))
#
#```-->

Confirmed cases floating window last 7 days

```{r echo=FALSE, fig.height = 9, fig.width = 15}
ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "confirmed"), 
       aes(x = xdate, color = country, y = window7)) +
  geom_line() +
  labs(
    title = "Confirmed Cases last 7 days\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

```

Deaths floating window last 7 days

```{r echo=FALSE, fig.height = 9, fig.width = 15}
ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "death"), 
       aes(x = xdate, color = country, y = window7)) +
  geom_line() +
  labs(
    title = "Confirmed Deaths last 7 days\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

```

Recovery floating window last 7 days

```{r echo=FALSE, fig.height = 9, fig.width = 15}
ggplot(coronavirus %>% filter(xdate >= 0 & xdate < 100) %>% filter(type == "recovered"), 
       aes(x = xdate, color = country, y = window7)) +
  geom_line() +
  labs(
    title = "Recovered last 7 days\n"
  ) + 
  scale_y_continuous(trans='log10', breaks = c(1,10,100,1000,10000,100000,1000000), labels = c("1","10","100","1000","10000","100000","1000000")) + 
  gghighlight(TRUE,  label_key = country, use_direct_label = TRUE, label_params = list(segment.color = NA, nudge_x = 1, size = 4))

```

--

page7.Rmd

0 → 100644
+125 −0
Original line number Diff line number Diff line
---
title: "page7"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

```{r}

packages <- c("RCzechia", "leaflet", "dplyr", "rnaturalearth", "rnaturalearthdata")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
  install.packages(setdiff(packages, rownames(installed.packages())))  
}

library(leaflet)
library(dplyr)
library(RCzechia)
library("rnaturalearth")
library("rnaturalearthdata")

worldMap <- ne_countries(scale = 'large')
worldMap <- subset(worldMap, sov_a3!="SVK"| sov_a3!="CZE")

date <- as.POSIXlt(Sys.Date())
dateString <- format(date, format="%m-%d-%Y")
filenameWorld <- paste(dateString,".csv", sep="")

new_data <- -1

while (new_data == -1)
{
  request <- paste("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/", filenameWorld, sep="")
  tryCatch({
    new_data <- read.csv(request)
  }, error=function(e){})
   date <- date -1
   dateString <- format(date, format="%m-%d-%Y")
   filenameWorld <- paste(dateString,".csv", sep="")
}


names(worldMap)[names(worldMap) == "admin"] <- "Country_Region"
grouped <-  group_by(new_data,Country_Region)

worldAgg <- grouped %>% summarise(
  latx = mean(Lat),
  longx = mean(Long_),
  Confirmed = sum(Confirmed)
)
levels(worldAgg$Country_Region) <- c(levels(worldAgg$Country_Region),"United States of America")
worldAgg$Country_Region[which(worldAgg$Country_Region == "US")] <- "United States of America"
request <- "https://onemocneni-aktualne.mzcr.cz/api/v1/covid-19/osoby.csv"
czech_new_data <- read.csv(request)

mergedWorld <- left_join(x = worldMap@data,y = worldAgg,by=c("Country_Region"), all.x=TRUE)

slovakia <- ne_states(country="Slovakia")
#czechia <- ne_states(country="Czech Republic")
czechiaMap <- kraje(resolution = "high")

names(czechiaMap)[names(czechiaMap) == "KOD_CZNUTS3"] <- "kraj"

groupedCZ <- group_by(czech_new_data, kraj)

czechAgg <- groupedCZ %>% summarise(
  cnt = n()
)

mergedCZ <- left_join(x = czechiaMap, y = czechAgg, by=c("kraj"), all.x=TRUE)

labelsSR <- sprintf(
  "<strong>%s</strong><br/>%s NO DATA YET",
  slovakia$name, slovakia$adm1_code
) %>% lapply(htmltools::HTML)

labelsCZ <- sprintf(
  "<strong>%s</strong><br/>%s infected",
  mergedCZ$NAZ_CZNUTS3, mergedCZ$cnt
) %>% lapply(htmltools::HTML)

labelsWorld <- sprintf(
  "<strong> %s</strong><br/>%s infected",
  mergedWorld$Country_Region, mergedWorld$Confirmed
) %>% lapply(htmltools::HTML)


leaf <- leaflet() %>%
  addTiles() %>%
  addPolygons(data=worldMap, color= "#444444",highlightOptions = highlightOptions(color = "white", weight = 2,
      bringToFront = TRUE),
      label = labelsWorld,
      labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")) %>%
  addPolygons(data=slovakia, color = "#345588", weight = 1, smoothFactor = 0.5,
    opacity = 1.0, fillOpacity = 0.5,
    highlight = highlightOptions(color = "white", weight = 2,
      bringToFront = TRUE),
    label = labelsSR,
    labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")) %>%
  addPolygons(data=mergedCZ, color = "#345588", weight = 1, smoothFactor = 0.5,
    opacity = 1.0, fillOpacity = 0.5,
    highlightOptions = highlightOptions(color = "white", weight = 2,
      bringToFront = TRUE),
    label = labelsCZ,
    labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")) %>%
    setView(17,50,6)

leaf
```
Loading