-
Notifications
You must be signed in to change notification settings - Fork 2
/
script_RBP_detection.sh
executable file
·86 lines (53 loc) · 2.24 KB
/
script_RBP_detection.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash
## Script detecting RBP binding sites in input sequences
## input:
# circRNA sequences file: "../backsplice_sequence_1.fa"
# beRBP parameters: $PWM, $RBP
# PWM: position weight matrix
# RBP: RBP name
# prefix for genome file: "$PREFIX_UCSC"
## command: /scripts/script_RBP_detection.sh $PWM $RBP $PREFIX_UCSC
PWM=$1
RBP=$2
PREFIX_UCSC=$3
mkdir /data/functional_predictions/RBP_detection
cd /data/functional_predictions/RBP_detection
cat ../backsplice_sequence_1.fa | grep ">" > headers.txt
cat ../backsplice_sequence_1.fa | grep -v ">" | cut -c1-20 > first_20_characters.txt
cat ../backsplice_sequence_1.fa | grep -v ">" > sequences.txt
paste sequences.txt first_20_characters.txt | sed -e 's/\t//' > sequences_plus_first_20_characters.txt
paste headers.txt sequences_plus_first_20_characters.txt | sed -e 's/\t/\n/' > backsplice_sequence_per_RBP.fa
# beRBP
cp backsplice_sequence_per_RBP.fa /tools/beRBP/work/temp/analysis_RBP.fasta
mkdir beRBP
cd beRBP
cd /tools/beRBP/work
../code/general_sPWM.sh analysis_RBP $PWM $RBP $PREFIX_UCSC &>temp/analysis_RBP.log
mv analysis_RBP/ /data/functional_predictions/RBP_detection/beRBP
mv temp/analysis_RBP.fasta /data/functional_predictions/RBP_detection/beRBP
mv temp/analysis_RBP.log /data/functional_predictions/RBP_detection/beRBP
cd /data/functional_predictions/RBP_detection/beRBP
cd analysis_RBP
cat resultMatrix.tsv | head -n1 > resultMatrix_a.tsv
for CIRC in $( cat ../../../circRNA_length.txt | cut -f1 )
do
L=$( cat ../../../circRNA_length.txt | grep $CIRC | cut -f2 )
cat resultMatrix.tsv | grep $CIRC | awk '$5 <= '$L' {print}' >> resultMatrix_a.tsv
done
cat resultMatrix_a.tsv | sed -e 's/seqID/circ_id/' > resultMatrix_b.txt
cat resultMatrix_a.tsv | cut -f1-6 > c1-6.txt
cat resultMatrix_a.tsv | cut -f7 | sed -e 's/\./,/' > c7.txt
paste c1-6.txt c7.txt | sed -e 's/ /\t/g' | sed -e 's/seqID/circ_id/' > resultMatrix_b.tsv
#select suggested binding threshold from beRBP ("voteFrac" > 0.35)
#head -n1 resultMatrix_b.tsv > resultMatrix_bind.tsv
#cat resultMatrix_b.tsv | awk '$4==1 {print}' | sort -k7nr >> resultMatrix_bind.tsv
rm resultMatrix_a.tsv
rm c1-6.txt
rm c7.txt
cd ..
cd ..
rm headers.txt
rm first_20_characters.txt
rm sequences.txt
rm sequences_plus_first_20_characters.txt
cd ..