-
Notifications
You must be signed in to change notification settings - Fork 0
/
realign_hg38.sh
52 lines (38 loc) · 1.78 KB
/
realign_hg38.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
#SBATCH -c 12 # Request one core
#SBATCH -N 1 # Request one node (if you request more than one core with -c, also using
# -N 1 means all cores will be on the same node)
#SBATCH -t 0-23:59 # Runtime in D-HH:MM format
#SBATCH -p medium # Partition to run in
#SBATCH --mem=24G # Memory total in MB (for all cores)
#SBATCH -o hostname_%j.out # File to which STDOUT will be written, including job ID
#SBATCH -e hostname_%j.err # File to which STDERR will be written, including job ID
#SBATCH --mail-type=ALL # Type of email notification- BEGIN,END,FAIL,ALL
module load gcc/6.2.0
module load bowtie2/2.3.4.3
mkdir hg38
BOWTIE_INDEX="/n/data2/dfci/medonc/cwu/livius/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.bowtie_index"
for bam_file in *.bam
do
library=`basename $bam_file .bam`
echo $library
if test -f "hg38/${library}.bam"; then
echo "$library already processed."
continue
fi
# extract fastq files
#samtools fastq -f 0x2 $bam_file -1 tmp1.fastq -2 tmp2.fastq -@ 12
gatk SamToFastq --INPUT $bam_file --FASTQ tmp1.fastq --SECOND_END_FASTQ tmp2.fastq
# alignment against hg38
bowtie2 -x $BOWTIE_INDEX --very-sensitive -p 12 -1 tmp1.fastq -2 tmp2.fastq -S tmp.sam
# fix mates
gatk FixMateInformation --INPUT tmp.sam --ADD_MATE_CIGAR true
# sort aligned reads
samtools sort tmp.sam -@ 12 > tmp.sorted.sam
# remove duplicates
gatk MarkDuplicates -I tmp.sorted.sam -O hg38/${library}.bam --REMOVE_DUPLICATES true --REMOVE_SEQUENCING_DUPLICATES true -M MarkDuplicates.metrics.txt
cd hg38
# index file
samtools index ${library}.bam
cd ..
done