Skip to content

Commit

Permalink
Add non-coding regions to SARS-CoV-2, for #589.
Browse files Browse the repository at this point in the history
Also stop reporting rows without data in nuc.csv and amino.csv. Report query position when a row comes from a single contig.
  • Loading branch information
donkirkby committed May 11, 2021
1 parent 5ae98a7 commit 0a938ea
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 99 deletions.
37 changes: 34 additions & 3 deletions micall/core/aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,11 @@ def combine_reports(self):
old_report[i].seed_amino.add(report_amino.seed_amino)
self.reports.clear()

def write_amino_report(self, amino_writer, reports, seed, coverage_summary=None):
def write_amino_report(self,
amino_writer: DictWriter,
reports: typing.Dict[str, typing.List['ReportAmino']],
seed: str,
coverage_summary: dict = None):
if not reports:
return
regions = sorted(reports.keys())
Expand Down Expand Up @@ -687,8 +691,20 @@ def write_amino_report(self, amino_writer, reports, seed, coverage_summary=None)
row[letter] = letter_count
coverage_sum += letter_count
row['coverage'] += letter_count
amino_writer.writerow(row)
pos_count += 1
for field_name in ('coverage',
'clip',
'v3_overlap',
'ins',
'del',
'partial',
'X'):
if row[field_name]:
break
else:
# Nothing useful, don't write this row.
continue
amino_writer.writerow(row)
if coverage_summary is not None and pos_count > 0:
region_coverage = coverage_sum / pos_count
old_coverage = coverage_summary.get('avg_coverage', -1)
Expand Down Expand Up @@ -930,6 +946,17 @@ def write_counts(self,
for base in 'ACTGN':
nuc_count = seed_nuc.counts[base]
row[base] = nuc_count
for field_name in ('coverage',
'clip',
'N',
'ins',
'del',
'v3_overlap'):
if row[field_name]:
break
else:
# No useful data, don't write this row.
continue
nuc_writer.writerow(row)

def merge_extra_counts(self):
Expand Down Expand Up @@ -1356,6 +1383,10 @@ def count_aminos(self, codon_seq, count):
seed_nucleotide.count_nucleotides(nuc, count)

def add(self, other: 'SeedAmino'):
if self.read_count and other.read_count:
self.consensus_nuc_index = None
elif other.read_count:
self.consensus_nuc_index = other.consensus_nuc_index
self.counts += other.counts
self.partial += other.partial
self.deletions += other.deletions
Expand Down Expand Up @@ -1498,7 +1529,7 @@ def count_overlap(self, other):


class ReportAmino(object):
def __init__(self, seed_amino, position):
def __init__(self, seed_amino: SeedAmino, position: int):
""" Create a new instance.
@param seed_amino: Counts for the
Expand Down
8 changes: 4 additions & 4 deletions micall/core/plot_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,18 +576,18 @@ def main():
parser = ArgumentParser(
description='Plot assembled contigs against a reference.',
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--blast',
help='CSV file with BLAST results for each contig',
type=FileType())
parser.add_argument('genome_coverage_csv',
help='CSV file with coverage counts for each contig',
type=FileType())
parser.add_argument('blast_csv',
help='CSV file with BLAST results for each contig',
type=FileType())
parser.add_argument('genome_coverage_svg',
help='SVG file to plot coverage counts for each contig')
args = parser.parse_args()

plot_genome_coverage(args.genome_coverage_csv,
args.blast_csv,
args.blast,
args.genome_coverage_svg)
print('Wrote', args.genome_coverage_svg)

Expand Down