-
Notifications
You must be signed in to change notification settings - Fork 2
/
script_ORF_prediction.sh
executable file
·131 lines (78 loc) · 5.53 KB
/
script_ORF_prediction.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/bash
## Script detecting ORFs in input sequences
## input:
# circRNA sequences file: "/data/functional_predictions/backsplice_sequence_1.fa"
# ORFfinder parameters: $GEN_CODE $START_CODON $MIN_LENGTH $NESTED_ORFS $STRAND
# GEN_CODE: genetic code to use (1-31), see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details; default: 1
# START_CODON: start codon to use (0 = "ATG" only, 1 = "ATG" and alternative initiation codons, 2 = any sense codon); default: 0
# MIN_LENGTH: minimal length of the ORF (nt); allowed values are: 30, 75, 150. Default: 30
# NESTED_ORFS: ignore nested ORFs (ORFs completely placed within another); allowed values: "TRUE", "FALSE". Default: "FALSE"
# STRAND: output ORFs on specified strand only; allowed values: "both", "plus", "minus". Default: "plus"
## command: /scripts/script_ORF_prediction.sh $GEN_CODE $START_CODON $MIN_LENGTH $NESTED_ORFS $STRAND
GEN_CODE=$1
START_CODON=$2
MIN_LENGTH=$3
NESTED_ORFS=$4
STRAND=$5
mkdir /data/functional_predictions/ORF_detection
cd /data/functional_predictions/ORF_detection
cat ../backsplice_sequence_1.fa | sed -r 's/(^[ATCG]+)/\1\1/; s/:/_/' > backsplice_sequence_per_ORF_$MIN_LENGTH.fa
cp ../backsplice_sequence_1.txt backsplice_sequence_$MIN_LENGTH.txt
# create file with backsplice position
cat ../backsplice_circRNA_length_1.txt | grep -v "circ_id" > circ_id_length.txt
cat circ_id_length.txt | awk '{print $1,$2-1,$2+1}' | sed -e 's/ /\t/g' | sort -k1,1n -k2,2n > backsplice_position.bed
# ORFfinder
mkdir ORFfinder
cd ORFfinder
N=$( cat ../circ_id_length.txt | wc -l )
if [ $N -gt 0 ]
then
/tools/ORFfinder -in ../backsplice_sequence_per_ORF_$MIN_LENGTH.fa -g $GEN_CODE -s $START_CODON -ml $MIN_LENGTH -n $NESTED_ORFS -strand $STRAND -out result_list_ORF_$MIN_LENGTH.txt -outfmt 0 -logfile ORF0_$MIN_LENGTH.log
/tools/ORFfinder -in ../backsplice_sequence_per_ORF_$MIN_LENGTH.fa -g $GEN_CODE -s $START_CODON -ml $MIN_LENGTH -n $NESTED_ORFS -strand $STRAND -out result_list_CDS_$MIN_LENGTH.txt -outfmt 1 -logfile ORF1_$MIN_LENGTH.log
/tools/ORFfinder -in ../backsplice_sequence_per_ORF_$MIN_LENGTH.fa -g $GEN_CODE -s $START_CODON -ml $MIN_LENGTH -n $NESTED_ORFS -strand $STRAND -out result_text_ORF_$MIN_LENGTH.txt -outfmt 2 -logfile ORF2_$MIN_LENGTH.log