--- title: 'NACHO Analysis' subtitle: "A NAnostring quality Control dasHbOard" author: "Mickaƫl Canouil, Ph.D., Gerard A. Bouland and Roderick C. Slieker, Ph.D." email: "mickael.canouil@cnrs.fr" date: '`r format(Sys.time(), "%B %d, %Y")`' bibliography: nacho.bib link-citations: true output: rmarkdown::html_vignette: number_sections: true toc: true toc_depth: 2 fig_width: 6.3 fig_height: 4.7 vignette: > %\VignetteIndexEntry{NACHO-analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( eval = TRUE, collapse = TRUE, # results = "asis", include = TRUE, echo = TRUE, warning = TRUE, message = TRUE, error = TRUE, # tidy = FALSE, # crop = TRUE, # autodep = TRUE, fig.align = "center", cache = FALSE ) ``` ```{r logo, echo = FALSE, out.width = "150px"} knitr::include_graphics(path = "nacho_hex.png") ``` # Installation ```{r, eval = FALSE} # Install NACHO from CRAN: install.packages("NACHO") # Or the the development version from GitHub: # install.packages("remotes") remotes::install_github("mcanouil/NACHO") ``` # Overview ```{r, echo = FALSE, results = "asis"} cat(readLines(system.file("app", "www", "about-nacho.md", package = "NACHO"))[-c(1, 2)], sep = "\n") ``` ```{r, echo = FALSE, results = "asis"} print(citation("NACHO"), "html") ``` ```{r, echo = FALSE, comment = ""} print(citation("NACHO"), "bibtex") ``` # Analyse NanoString data ## Load packages ```{r} library(NACHO) library(GEOquery, quietly = TRUE, warn.conflicts = FALSE) ``` ## Download `GSE70970` from GEO (or use your own data) ```{r} data_directory <- file.path(tempdir(), "GSE70970", "Data") # Download data gse <- getGEO("GSE70970") getGEOSuppFiles(GEO = "GSE70970", baseDir = tempdir()) # Unzip data untar( tarfile = file.path(tempdir(), "GSE70970", "GSE70970_RAW.tar"), exdir = data_directory ) # Get phenotypes and add IDs targets <- pData(phenoData(gse[[1]])) targets$IDFILE <- list.files(data_directory) ``` ## Import RCC files ```{r} GSE70970 <- load_rcc(data_directory, targets, id_colname = "IDFILE") ``` ## Perform the analyses using `limma` ```{r} library(limma) ``` ### Get the phenotypes ```{r} selected_pheno <- GSE70970[["nacho"]][ j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)), .SDcols = c("IDFILE", "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1") ] selected_pheno <- na.exclude(selected_pheno) ``` ```{r, echo = FALSE} head(selected_pheno) ``` ### Get the normalised counts ```{r} expr_counts <- GSE70970[["nacho"]][ i = grepl("Endogenous", CodeClass), j = as.matrix( dcast(.SD, Name ~ IDFILE, value.var = "Count_Norm"), "Name" ), .SDcols = c("IDFILE", "Name", "Count_Norm") ] ``` ```{r, echo = FALSE} expr_counts[1:5, 1:5] ``` Alternatively, `"Accession"` number is also available. ```{r, eval = FALSE} GSE70970[["nacho"]][ i = grepl("Endogenous", CodeClass), j = as.matrix( dcast(.SD, Accession ~ IDFILE, value.var = "Count_Norm"), "Accession" ), .SDcols = c("IDFILE", "Accession", "Count_Norm") ] ``` ### Select phenotypes and counts 1. Make sure count matrix and phenotypes have the same samples ```{r} samples_kept <- intersect(selected_pheno[["IDFILE"]], colnames(expr_counts)) expr_counts <- expr_counts[, samples_kept] selected_pheno <- selected_pheno[IDFILE %in% c(samples_kept)] ``` 2. Build the numeric design matrix ```{r} design <- model.matrix(~ `disease.event:ch1`, selected_pheno) ``` 3. `limma` ```{r} eBayes(lmFit(expr_counts, design)) ``` ## Perform the analyses using `lm` (or any other model) ```{r} GSE70970[["nacho"]][ i = grepl("Endogenous", CodeClass), j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)), .SDcols = c( "IDFILE", "Name", "Accession", "Count", "Count_Norm", "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1" ) ][ Name %in% head(unique(Name), 10) ][ j = as.data.table( coef(summary(lm( formula = Count_Norm ~ `disease.event:ch1`, data = na.exclude(.SD) ))), "term" ), by = c("Name", "Accession") ] ```