Skip to content

Commit

Permalink
Add missing regions to SARS-CoV-2, for #589.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Jul 30, 2020
1 parent 5a9dee8 commit 2115dc8
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 52 deletions.
11 changes: 0 additions & 11 deletions micall/project_scoring.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,6 @@
"max_variants": 0,
"description": "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
"regions": [
{
"coordinate_region": "SARS-CoV-2-orf1ab",
"coordinate_region_length": 7096,
"key_positions": [],
"min_coverage1": 10,
"min_coverage2": 50,
"min_coverage3": 100,
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-S",
"coordinate_region_length": 1273,
Expand Down
153 changes: 119 additions & 34 deletions micall/projects.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@
"SARS-CoV-2": {
"max_variants": 0,
"regions": [
{
"coordinate_region": "SARS-CoV-2-orf1ab",
"seed_region_names": [
"SARS-CoV-2-seed"
]
},
{
"coordinate_region": "SARS-CoV-2-S",
"seed_region_names": [
Expand Down Expand Up @@ -4933,7 +4927,91 @@
}
},
"regions": {
"SARS-CoV-2-orf1ab": {
"SARS-CoV-2-3'UTR": {
"is_nucleotide": true,
"reference": [
"CAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCC",
"ACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCT",
"AATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGA",
"CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
],
"seed_group": null
},
"SARS-CoV-2-5'UTR": {
"is_nucleotide": true,
"reference": [
"ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCT",
"CTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTAT",
"AATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTT",
"ACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAG",
"GTAAG"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-1": {
"is_nucleotide": true,
"reference": [
"ACGAACA"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-2": {
"is_nucleotide": true,
"reference": [
"ACGAACTT"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-3": {
"is_nucleotide": true,
"reference": [
"GCACAAGCTGATGAGTACGAACTT"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-4": {
"is_nucleotide": true,
"reference": [
"ACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCC"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-5": {
"is_nucleotide": true,
"reference": [
"GTGACAACAG"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-6": {
"is_nucleotide": true,
"reference": [
"ACGAAC"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-7": {
"is_nucleotide": true,
"reference": [
"ACGAAC"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-8": {
"is_nucleotide": true,
"reference": [
"ACGAACAAACTAAA"
],
"seed_group": null
},
"SARS-CoV-2-TRS-B-9": {
"is_nucleotide": true,
"reference": [
"ACTCATGCAGACCACACAAGGCAG"
],
"seed_group": null
},
"SARS-CoV-2-ORF1a": {
"is_nucleotide": false,
"reference": [
"MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLE",
Expand Down Expand Up @@ -5003,7 +5081,14 @@
"TACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKV",
"KYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQP",
"ITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTT",
"CANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGVSAARLTPCGTGTST",
"CANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGV"
],
"seed_group": null
},
"SARS-CoV-2-ORF1b": {
"is_nucleotide": false,
"reference": [
"RVCGVSAARLTPCGTGTST",
"DVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDC",
"PAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFN",
"KKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFI",
Expand Down Expand Up @@ -5045,7 +5130,7 @@
"TLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFI",
"QQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGY",
"VMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNR",
"VVISSDVLVNN"
"VVISSDVLVNN*"
],
"seed_group": null
},
Expand All @@ -5071,7 +5156,7 @@
"DFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVT",
"QRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDIS",
"GINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLC",
"CMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT"
"CMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT*"
],
"seed_group": null
},
Expand All @@ -5082,15 +5167,15 @@
"KKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLC",
"WKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWES",
"GVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVM",
"EPIYDEPTTTTSVPL"
"EPIYDEPTTTTSVPL*"
],
"seed_group": null
},
"SARS-CoV-2-E": {
"is_nucleotide": false,
"reference": [
"MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNL",
"NSSRVPDLLV"
"NSSRVPDLLV*"
],
"seed_group": null
},
Expand All @@ -5100,37 +5185,37 @@
"MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACF",
"VLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILT",
"RPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAA",
"YSRYRIGNYKLNTDHSSSSDNIALLVQ"
"YSRYRIGNYKLNTDHSSSSDNIALLVQ*"
],
"seed_group": null
},
"SARS-CoV-2-ORF6": {
"is_nucleotide": false,
"reference": [
"MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID"
"MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID*"
],
"seed_group": null
},
"SARS-CoV-2-ORF7a": {
"is_nucleotide": false,
"reference": [
"MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFST",
"QFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE"
"QFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE*"
],
"seed_group": null
},
"SARS-CoV-2-ORF7b": {
"is_nucleotide": false,
"reference": [
"MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA"
"MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA*"
],
"seed_group": null
},
"SARS-CoV-2-ORF8": {
"is_nucleotide": false,
"reference": [
"MKFLVFLGIITTVAAFHQECSLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLIELC",
"VDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI"
"VDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI*"
],
"seed_group": null
},
Expand All @@ -5143,14 +5228,14 @@
"NSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQ",
"KRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVT",
"PSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQ",
"TVTLLPAADLDDFSKQLQQSMSSADSTQA"
"TVTLLPAADLDDFSKQLQQSMSSADSTQA*"
],
"seed_group": null
},
"SARS-CoV-2-ORF10": {
"is_nucleotide": false,
"reference": [
"MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT"
"MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT*"
],
"seed_group": null
},
Expand All @@ -5159,7 +5244,7 @@
"reference": [
"MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLE",
"QPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKG",
"AGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNG"
"AGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG"
],
"seed_group": null
},
Expand All @@ -5175,7 +5260,7 @@
"DGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKF",
"KEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKL",
"KALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQP",
"LEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKG"
"LEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG"
],
"seed_group": null
},
Expand Down Expand Up @@ -5211,7 +5296,7 @@
"GQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLD",
"QALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAAR",
"QGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQV",
"AKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKG"
"AKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGG"
],
"seed_group": null
},
Expand All @@ -5225,7 +5310,7 @@
"VNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMS",
"FTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHF",
"YWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFS",
"GAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVL"
"GAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ"
],
"seed_group": null
},
Expand All @@ -5236,7 +5321,7 @@
"FLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAM",
"RPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAG",
"TDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIA",
"VLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTF"
"VLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ"
],
"seed_group": null
},
Expand All @@ -5247,15 +5332,15 @@
"FLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTART",
"VYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVE",
"YCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLL",
"PPKNSIDAFKLNIKLLGVGGKPCIKVATV"
"PPKNSIDAFKLNIKLLGVGGKPCIKVATVQ"
],
"seed_group": null
},
"SARS-CoV-2-nsp7": {
"is_nucleotide": false,
"reference": [
"SKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGA",
"VDINKLCEEMLDNRATL"
"VDINKLCEEMLDNRATLQ"
],
"seed_group": null
},
Expand All @@ -5265,15 +5350,15 @@
"AIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQ",
"AMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMV",
"VIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAV",
"KL"
"KLQ"
],
"seed_group": null
},
"SARS-CoV-2-nsp9": {
"is_nucleotide": false,
"reference": [
"NNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTI",
"YTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRL"
"YTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQ"
],
"seed_group": null
},
Expand All @@ -5282,7 +5367,7 @@
"reference": [
"AGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQ",
"ESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSC",
"DQLREPML"
"DQLREPMLQ"
],
"seed_group": null
},
Expand All @@ -5303,7 +5388,7 @@
"ADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIK",
"NFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVD",
"DIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTND",
"NTSRYWEPEFYEAMYTPHTVL"
"NTSRYWEPEFYEAMYTPHTVLQ"
],
"seed_group": null
},
Expand All @@ -5319,7 +5404,7 @@
"LRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSAL",
"VYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNA",
"VASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKL",
"QFTSLEIPRRNVATL"
"QFTSLEIPRRNVATLQ"
],
"seed_group": null
},
Expand All @@ -5334,7 +5419,7 @@
"FPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVD",
"RYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESH",
"GKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLW",
"NTFTRL"
"NTFTRLQ"
],
"seed_group": null
},
Expand All @@ -5346,7 +5431,7 @@
"VDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFT",
"QSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKE",
"SPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYT",
"EISFMLWCKDGHVETFYPKL"
"EISFMLWCKDGHVETFYPKLQ"
],
"seed_group": null
},
Expand All @@ -5357,7 +5442,7 @@
"RVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISD",
"MYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVT",
"NVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVM",
"SLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVN"
"SLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN"
],
"seed_group": null
},
Expand Down
3 changes: 3 additions & 0 deletions micall/tests/microtest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ The scenarios that each file tests are:
details.
* 2170A-HCV - a mixed infection of HCV-1a and HCV-2a, each about 3000 bases
long, with enough coverage to assemble two contigs.
* 2180A-HIV - random reads from GP120 that can be assembled. See the
`make_sample.py` script for details.
* 2190A-SARS - amplicon from the start of nsp12.
Loading

0 comments on commit 2115dc8

Please sign in to comment.