-
Notifications
You must be signed in to change notification settings - Fork 0
/
ENCODE_vs_hybrids.R
61 lines (50 loc) · 2.24 KB
/
ENCODE_vs_hybrids.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Overlap ENCODE binary with hybrids generated by me
library(dplyr)
library(GenomicRanges)
library(plyranges)
library(regioneR)
library(readr)
library(magrittr)
library(tibble)
library(ggplot2)
library(stringr)
library(ggthemes)
library(ggthemr)
library(ggsignif)
ggthemr('dust')
library("VennDiagram")
library(pheatmap)
Hybrids_non_binary <- read.delim("~/Data/GM12878/Hybrids/Hybrids_June23.tsv")
Hybrids_non_binary <- Hybrids_non_binary %>% mutate(ID = 1:nrow(Hybrids_non_binary))
Hybrids_non_binary <- makeGRangesFromDataFrame(Hybrids_non_binary, keep.extra.columns = T)
GM12878_ENCODE_features <- read.delim("~/Data/GM12878/ENCODE/GM12878_ENCODE_features.tsv")
GM12878_ENCODE_features <- makeGRangesFromDataFrame(GM12878_ENCODE_features, keep.extra.columns = T)
Hybrids_non_binary <- join_overlap_inner(Hybrids_non_binary, GM12878_ENCODE_features)
Hybrids_non_binary <- data.frame(Hybrids_non_binary)
l <- list()
i = 1
while (i <= nrow(Hybrids_non_binary)) {
if (i %% 5000 == 0) print(paste0("We are at row ", i))
iFirst = i
while (i < nrow(Hybrids_non_binary) - 1 &&
(Hybrids_non_binary[i, "end"] == Hybrids_non_binary[i + 1, "end"] &&
Hybrids_non_binary[i, "start"] == Hybrids_non_binary[i + 1, "start"] &&
Hybrids_non_binary[i, "seqnames"] == Hybrids_non_binary[i + 1, "seqnames"]))
i = i + 1
iLast = i
# now we copy the last row we have checked
newRow = Hybrids_non_binary[i,]
# we change the start to match the start in the beginning of the chain
newRow[1, "start"] = Hybrids_non_binary[iFirst,"start"]
# we take the maximum for each of the rows that contain 0 or 1
for (j in 6:64) newRow[1, j] = sum(Hybrids_non_binary[iFirst:iLast, j]) # now 6:64 because GRanges has added width and strand
# adds this row to the end of our resulting list
l[[length(l) + 1]] <- newRow
# increase the idex i to be ready for the next chain
i = i + 1
}
Hybrids_non_binary <- data.frame(data.table::rbindlist(l))
Normalised_Hybrids_non_binary <- Hybrids_non_binary %>%
mutate(bin_number = round(width / 200)) %>%
mutate(across(7:65, ~ ./bin_number))
pheatmap(Normalised_Hybrids_non_binary[c(7:65)], fontsize_row = 7, fontsize_col = 9, cutree_cols = 2, angle_col = 90, cluster_rows = F, labels_row="")