Skip to content

Commit

Permalink
Add project scoring for noncoding regions in SARS-CoV-2, part of #589.
Browse files Browse the repository at this point in the history
Stop highlighting partial-match contigs as unaligned.
  • Loading branch information
donkirkby committed May 14, 2021
1 parent 2da6390 commit 1d0bd1a
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 23 deletions.
3 changes: 2 additions & 1 deletion micall/core/aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,8 @@ def write_sequence_coverage_counts(self,
if bead.alignment is not None:
cigar = bead.alignment.cigar
else:
cigar = [(bead.end - bead.start, None)]
action = CigarActions.MATCH if coordinate_name is None else None
cigar = [(bead.end - bead.start, action)]
for length, action in cigar:
if action == CigarActions.DELETE:
ref_pos += length
Expand Down
136 changes: 134 additions & 2 deletions micall/project_scoring.json
Original file line number Diff line number Diff line change
Expand Up @@ -7200,6 +7200,28 @@
"SARSCOV2": {
"max_variants": 0,
"regions": [
{
"coordinate_region": "SARS-CoV-2-3'UTR",
"coordinate_region_length": 226,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-5'UTR",
"coordinate_region_length": 262,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-E",
"coordinate_region_length": 75,
Expand Down Expand Up @@ -7245,8 +7267,19 @@
]
},
{
"coordinate_region": "SARS-CoV-2-ORF1ab",
"coordinate_region_length": 7096,
"coordinate_region": "SARS-CoV-2-ORF1a",
"coordinate_region_length": 4405,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-ORF1b",
"coordinate_region_length": 2695,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
Expand Down Expand Up @@ -7321,6 +7354,105 @@
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-1",
"coordinate_region_length": 4,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-2",
"coordinate_region_length": 5,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-3",
"coordinate_region_length": 21,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-4",
"coordinate_region_length": 47,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-5",
"coordinate_region_length": 7,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-6",
"coordinate_region_length": 3,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-7",
"coordinate_region_length": 3,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-8",
"coordinate_region_length": 11,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-TRS-B-9",
"coordinate_region_length": 21,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-nsp1",
"coordinate_region_length": 179,
Expand Down
24 changes: 12 additions & 12 deletions micall/tests/test_aln2counts_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,12 +801,12 @@ def test_contig_coverage_report_for_partial_contig(sequence_report):

expected_text = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-R1-seed-partial,,1,1,0,5,U
1-R1-seed-partial,,2,2,0,5,U
1-R1-seed-partial,,3,3,0,5,U
1-R1-seed-partial,,4,4,0,5,U
1-R1-seed-partial,,5,5,0,5,U
1-R1-seed-partial,,6,6,0,5,U
1-R1-seed-partial,,1,1,0,5,M
1-R1-seed-partial,,2,2,0,5,M
1-R1-seed-partial,,3,3,0,5,M
1-R1-seed-partial,,4,4,0,5,M
1-R1-seed-partial,,5,5,0,5,M
1-R1-seed-partial,,6,6,0,5,M
"""

report_file = StringIO()
Expand Down Expand Up @@ -835,12 +835,12 @@ def test_contig_coverage_report_for_reversed_contig(sequence_report):

expected_text = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-R1-seed-reversed,,1,1,0,5,U
1-R1-seed-reversed,,2,2,0,5,U
1-R1-seed-reversed,,3,3,0,5,U
1-R1-seed-reversed,,4,4,0,5,U
1-R1-seed-reversed,,5,5,0,5,U
1-R1-seed-reversed,,6,6,0,5,U
1-R1-seed-reversed,,1,1,0,5,M
1-R1-seed-reversed,,2,2,0,5,M
1-R1-seed-reversed,,3,3,0,5,M
1-R1-seed-reversed,,4,4,0,5,M
1-R1-seed-reversed,,5,5,0,5,M
1-R1-seed-reversed,,6,6,0,5,M
"""

report_file = StringIO()
Expand Down
7 changes: 4 additions & 3 deletions micall/tests/test_denovo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from io import StringIO
from pathlib import Path

from pytest import fixture
from pytest import fixture, mark

from micall.core.denovo import write_contig_refs, denovo, DEFAULT_DATABASE, genotype
from micall.blast_db.make_blast_db import make_blast_db, DEFAULT_PROJECTS
Expand Down Expand Up @@ -207,6 +207,7 @@ def test_merged_contig(tmpdir, hcv_db):
assert expected_contigs_csv == contigs_csv.getvalue()


@mark.iva()
def test_denovo_iva(tmpdir, hcv_db):
microtest_path = Path(__file__).parent / 'microtest'
contigs_csv = StringIO()
Expand All @@ -221,8 +222,8 @@ def test_denovo_iva(tmpdir, hcv_db):
AGGCGGTGATGGGGGCTTCTTATGGATTCCAGTACTCCC
"""

denovo(microtest_path / '2160A-HCV_S19_L001_R1_001.fastq',
microtest_path / '2160A-HCV_S19_L001_R2_001.fastq',
denovo(str(microtest_path / '2160A-HCV_S19_L001_R1_001.fastq'),
str(microtest_path / '2160A-HCV_S19_L001_R2_001.fastq'),
contigs_csv,
tmpdir)

Expand Down
2 changes: 1 addition & 1 deletion micall/tests/test_plot_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def drawing_to_image(drawing: Drawing) -> Image:
return image


def encode_image(image: Image) -> bytes:
def encode_image(image: Image) -> str:
writer = BytesIO()
image.save(writer, format='PNG')
encoded = standard_b64encode(writer.getvalue())
Expand Down
6 changes: 2 additions & 4 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
[pytest]
norecursedirs = sandbox

# Can remove this filter when BioPython upgrades to 1.78.
filterwarnings = ignore::PendingDeprecationWarning:Bio.Alphabet
markers =
iva: test requires IVA (deselect with '-m "not iva"')

0 comments on commit 1d0bd1a

Please sign in to comment.