Skip to content

Commit

Permalink
Strand specific counting wrt samples (gagneurlab#72)
Browse files Browse the repository at this point in the history
* make strandSpecificity by sample

* move strand specific error to validObject

* minor changes in strandSpecific function and properties

* correct identation

---------

Co-authored-by: Christian Mertes <mertes@in.tum.de>
  • Loading branch information
AtaJadidAhari and c-mertes committed Apr 25, 2024
1 parent 6f5c919 commit e775832
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 37 deletions.
38 changes: 28 additions & 10 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,44 @@ setReplaceMethod("workingDir", "FraserDataSet", function(object, value) {
#' @export
#' @rdname fds-methods
setMethod("strandSpecific", "FraserDataSet", function(object) {
return(slot(object, "strandSpecific"))
if(!"strand" %in% colnames(colData(object))){
warning("Strand is not specified. Please set the used RNA-seq",
" protocol by using 'strandSpecific(object) <- c(...)'.",
"\n\nWe assume as default a non stranded protocol.")
return(rep(0, ncol(object)))
}
return(colData(object)$strand)
})

#' @export
#' @rdname fds-methods
setReplaceMethod("strandSpecific", "FraserDataSet", function(object, value) {
if (length(value) != ncol(object)){
if(length(value) == 1){
warning("Only one value is provided as strand for all samples.\n",
" We assume that all samples are of the same provided strand.")
strandSpecific(object) <- rep(value, ncol(object))
}
else{
stop("Number of strand values should be equal to the number of samples: (",
paste0(length(value), " != ", ncol(object), ")"))
}
}
if(is.logical(value)){
value <- as.integer(value)
}
if(is.character(value)){
value <- switch(tolower(value),
'no' = 0L,
'unstranded' = 0L,
'yes' = 1L,
'stranded' = 1L,
'reverse' = 2L,
-1L)
val_chr <- tolower(value)
value <- sapply(val_chr, switch, 'no' = 0L, 'unstranded' = 0L,
'yes' = 1L, 'stranded' = 1L, 'reverse' = 2L, -1L)
if(any(value < 0)){
stop("Please specify correct strandness of the samples.",
" It needs to be one of: 'no', 'unstranded', 'yes',",
" 'stranded' or 'reverse'.", "\n\nIt was specified: ",
paste(collapse = ", ", val_chr[value < 0]))
}
}
slot(object, "strandSpecific") <- value
colData(object)$strand <- as.integer(value)
validObject(object)
return(object)
})
Expand Down Expand Up @@ -309,7 +328,6 @@ subset.FRASER <- function(x, i, j, by=c("j", "ss"), ..., drop=FALSE){
subX,
name = name(x),
bamParam = scanBamParam(x),
strandSpecific = strandSpecific(x),
workingDir = workingDir(x),
nonSplicedReads = nsrObj
)
Expand Down
44 changes: 28 additions & 16 deletions R/FraserDataSet-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ setClass("FraserDataSet",
slots = list(
name = "character",
bamParam = "ScanBamParam",
strandSpecific = "integer",
workingDir = "character",
nonSplicedReads = "RangedSummarizedExperiment"
),
prototype = list(
name = "Data Analysis",
bamParam = ScanBamParam(mapqFilter=0),
strandSpecific = 0L,
workingDir = "FRASER_output",
nonSplicedReads = SummarizedExperiment(rowRanges=GRanges())
)
Expand Down Expand Up @@ -73,9 +71,19 @@ validateBamParam <- function(object) {
}

validateStrandSpecific <- function(object) {
if(!isScalarInteger(object@strandSpecific)) {
return(paste("The 'strandSpecific' option must be 0L (unstranded),",
"1L (stranded) or 2L (reverse)."))
sampleData <- as.data.table(colData(object))
if("strand" %in% colnames(sampleData) &&
(!is.integer(sampleData[,strand])
|| any(!sampleData[,strand] %in% 0:2))){
return(paste("The 'strand' column in the sample annotation in",
"'colData(fds)' must be empty or contain only integers:",
"0L == 'no', 1L == 'yes', 2L == 'reverse'."))
}
# Check mixed strand type
ss <- strandSpecific(object)
if ((any(ss == 0) && any(ss == 1)) || (any(ss == 0) && any(ss == 2))){
stop(paste("Data contains a mix of stranded and unstranded samples.\n ",
"Please analyse them separately to ensure consistency during counting."))
}
NULL
}
Expand All @@ -85,8 +93,8 @@ validatePairedEnd <- function(object) {
if("pairedEnd" %in% colnames(sampleData) &&
any(!is.logical(sampleData[,pairedEnd]))){
return(paste("The 'pairedEnd' column in the sample annotation in",
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
}
NULL
}
Expand Down Expand Up @@ -114,7 +122,8 @@ validateNonSplicedReadsType <- function(object) {
return("'nonSplicedReads' must be a RangedSummarizedExperiment object")
}
if(length(object) != 0 && dim(object@nonSplicedReads)[2] != dim(object)[2]){
return("The nonSplicedReads dimensions are not correct. This is a internal error!")
return(paste("The nonSplicedReads dimensions are not correct.",
"This is a internal error!"))
}
ans <- validObject(object@nonSplicedReads)
if(!isScalarLogical(ans) || ans == FALSE){
Expand All @@ -134,21 +143,24 @@ validateAssays <- function(object){
NULL
}

# for non-empty fds objects check if non-spliced reads are overlapping with at least 1 donor/acceptor site
# for non-empty fds objects check if non-spliced reads are overlapping
# with at least 1 donor/acceptor site
validateNonSplicedReadsSanity <- function(object){
# fds object must have samples and junctions
if(all(dim(object) > c(0,0))){

# fds object must be annotated with start/end/spliceSite indexes
if("startID" %in% names(mcols(object)) && "endID" %in% names(mcols(object)) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){
if(all(c("startID", "endID") %in% names(mcols(object))) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){

# check that every spliceSiteID matches either a start or end index
if(length( intersect( mcols(object, type="theta")$spliceSiteID, unlist(mcols(object)[, c("startID", "endID")] ) ) )
!= nrow(nonSplicedReads(object))){
return("The nonSplicedReads do not have corresponding splitReads. This is probably the result of merging")
}
# check that every spliceSiteID matches either a start or end index
if(nrow(nonSplicedReads(object)) != length(intersect(
mcols(object, type="theta")$spliceSiteID,
unlist(mcols(object)[, c("startID", "endID")])))){
return(paste("The nonSplicedReads do not have corresponding",
"splitReads. This is probably the result of merging"))
}
}
}
NULL
}
Expand Down
3 changes: 2 additions & 1 deletion R/annotationOfRanges.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,8 @@ findAnnotatedJunction <- function(fds, annotation, annotateNames=TRUE,
# check if strandspecific data is used
gr <- rowRanges(fds, type="psi5")

if(isFALSE(as.logical(stranded))){
#
if(isFALSE(as.logical(unique(stranded)))){
strand(gr) <- "*"
}

Expand Down
8 changes: 4 additions & 4 deletions R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ addCountsToFraserDataSet <- function(fds, splitCounts, nonSplitCounts){
splitCounts,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitCounts,
metadata = metadata(fds)
Expand Down Expand Up @@ -461,14 +460,14 @@ countSplitReads <- function(sampleID, fds, NcpuPerSample=1, genome=NULL,
recount=FALSE, keepNonStandardChromosomes=TRUE,
bamfile=bamFile(fds[,sampleID]),
pairedend=pairedEnd(fds[,sampleID]),
strandmode=strandSpecific(fds),
strandmode=strandSpecific(fds[,sampleID]),
cacheFile=getSplitCountCacheFile(sampleID, fds),
scanbamparam=scanBamParam(fds),
coldata=colData(fds)){

# check for valid fds
validObject(fds)

# check cache if available
if(isFALSE(recount) && !is.null(cacheFile) && file.exists(cacheFile)){
cache <- readRDS(cacheFile)
Expand Down Expand Up @@ -865,6 +864,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
# unstranded case: for counting only non spliced reads we
# skip this information
isPairedEnd <- pairedEnd(fds[,samples(fds) == sampleID])[[1]]
strand <- strandSpecific(fds[,samples(fds) == sampleID])[[1]]
doAutosort <- isPairedEnd

# check cache if available
Expand Down Expand Up @@ -906,7 +906,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
allowMultiOverlap=TRUE,
checkFragLength=FALSE,
minMQS=bamMapqFilter(scanBamParam(fds)),
strandSpecific=as.integer(strandSpecific(fds)),
strandSpecific=strand,

# activating long read mode
isLongRead=longRead,
Expand Down
7 changes: 6 additions & 1 deletion R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,12 @@ getBPParam <- function(worker, tasks=0, ...){
}

getStrandString <- function(fds){
strand <- switch(strandSpecific(fds)+1L, "no", "yes", "reverse")
strand <- sapply(strandSpecific(fds)+1L, switch, "no", "yes (forward)", "yes (reverse)")
if (length(unique(strand)) == 2){
strand <- "yes (forward and reverse)"
} else {
strand <- unique(strand)
}
return(strand)
}

Expand Down
2 changes: 0 additions & 2 deletions R/makeSimulatedDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down Expand Up @@ -364,7 +363,6 @@ makeSimulatedFraserDataSet_Multinomial <- function(m=200, j=1000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down
1 change: 0 additions & 1 deletion R/mergeExternalData.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ mergeExternalData <- function(fds, countFiles, sampleIDs, annotation=NULL){
ans <- new("FraserDataSet",
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
colData = newColData,
assays = Assays(SimpleList(
Expand Down
2 changes: 1 addition & 1 deletion man/countRNA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_counting.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("Strand spcific counting", {
fds <- createTestFraserSettings()
strandSpecific(fds) <- TRUE
ans <- countSplitReadsPerChromosome("chrUn_gl000218", bamFile(fds)[1],
pairedEnd=TRUE, strandMode=strandSpecific(fds), genome=NULL,
pairedEnd=TRUE, strandMode=strandSpecific(fds)[1], genome=NULL,
scanBamParam=scanBamParam(fds))
expect_equivalent(ans, GRanges())

Expand Down

0 comments on commit e775832

Please sign in to comment.