diff --git a/micall/core/remap.py b/micall/core/remap.py index 450f55d1a..4f8e45092 100644 --- a/micall/core/remap.py +++ b/micall/core/remap.py @@ -592,7 +592,7 @@ def remap(fastq1, # regenerate consensus sequences distance_report = {} conseqs = build_conseqs(samfile, - seeds=seeds, + seeds=conseqs, is_filtered=True, worker_pool=worker_pool, filter_coverage=count_threshold//2, # pairs diff --git a/micall/monitor/kive_loader.py b/micall/monitor/kive_loader.py index b9d101b21..a1875869b 100644 --- a/micall/monitor/kive_loader.py +++ b/micall/monitor/kive_loader.py @@ -189,7 +189,8 @@ def check_folders(self): if self.folders is None or now >= self.folder_scan_time: self.folder_scan_time = now + timedelta(seconds=self.folder_delay) new_folders = self.find_folders() - if self.folders is None or set(new_folders).difference(self.folders): + old_folders = self.folders or [] + if self.folders is None or set(new_folders).difference(old_folders): # First time or we found a new folder self.folders = new_folders self.reset_folders() @@ -341,8 +342,7 @@ def is_marked_as_disabled(folder): def is_quality_control_uploaded(folder): return os.path.exists(os.path.join(folder, settings.QC_UPLOADED)) - @staticmethod - def find_files(folder): + def find_files(self, folder): """ Find FASTQ files within a folder. @return: a list of paths to the files within the folder. @@ -364,7 +364,7 @@ def find_files(folder): failed_demultiplexing, filepath) - return sorted(gz_files) + return sorted(gz_files, key=self.get_sample_number, reverse=True) def prepare_kive_dataset(self, filename, description, cdt): """ Upload a dataset to Kive, if it's not already. @@ -545,6 +545,11 @@ def get_sample_name(fastq1): sample_name = short_name + '_' + sample_num return sample_name + @staticmethod + def get_sample_number(fastq1): + match = re.match(r'.*_S(\d+)_', fastq1) + return int(match.group(1)) + def get_run_name(self, pipeline_id, sample_name): pipeline = self.pipelines[pipeline_id] name = pipeline['name_format'].format(sample=sample_name, diff --git a/micall/monitor/update_qai.py b/micall/monitor/update_qai.py index 466f04766..98bba011e 100644 --- a/micall/monitor/update_qai.py +++ b/micall/monitor/update_qai.py @@ -118,41 +118,6 @@ def build_conseqs(conseqs_file, return result -def build_hla_b_seqs(sample_file): - """ - Build JSON hashes for HLA-B sequence records - - @param sample_file: open file that holds the variant info - """ - - result = [] - expected_exon_prefix = 'HLA-B-exon' - # sample,seed,qcut,region,index,count,seq - rows = csv.DictReader(sample_file) - for row in rows: - ind = int(row['index']) - - sample_name = row['sample'] - exon = row['region'] - if exon.startswith(expected_exon_prefix): - exon_number = int(exon[len(expected_exon_prefix):]) - else: - raise ValueError('Unexpected exon {!r}', exon) - qcutoff = row['qcut'] - cnt = row['count'] - curr_seq = row['seq'] - - result.append({'samplename': sample_name, - 'testcode': None, - 'exon': exon_number, - 'qcutoff': qcutoff, - 'ind': ind, - 'cnt': cnt, - 'string': curr_seq}) - - return result - - def build_review_decisions(coverage_file, collated_counts_file, sample_sheet, @@ -265,7 +230,6 @@ def upload_review_to_qai(coverage_file, run, sample_sheet, conseqs, - hla_b_seqs, session): """ Create a review. @@ -277,8 +241,6 @@ def upload_review_to_qai(coverage_file, which tags @param conseqs: an array of JSON hashes to pass to QAI for the conseq child records - @param hla_b_seqs: an array of JSON hashes to pass to QAI for the hla_b_seq - child records @param session: the QAI session """ @@ -303,8 +265,7 @@ def upload_review_to_qai(coverage_file, {'runid': runid, 'pipeline_id': find_pipeline_id(session), 'lab_miseq_review_decisions': decisions, - 'lab_miseq_conseqs': conseqs, - 'lab_miseq_hla_b_seqs': hla_b_seqs}) + 'lab_miseq_conseqs': conseqs}) def clean_runname(runname): @@ -391,15 +352,12 @@ def process_folder(result_folder): run, sample_sheet, ok_sample_regions) - with open(nuc_variants, "rU") as f: - hla_b_seqs = build_hla_b_seqs(f) with open(coverage_scores, "rU") as f, open(collated_counts, "rU") as f2: upload_review_to_qai(f, f2, run, sample_sheet, conseqs, - hla_b_seqs, session) diff --git a/micall/tests/kive_loader_test.py b/micall/tests/kive_loader_test.py index 1e1532b3b..67b7060ed 100644 --- a/micall/tests/kive_loader_test.py +++ b/micall/tests/kive_loader_test.py @@ -461,7 +461,7 @@ def test_unable_to_check_status(self): # noinspection PyUnusedLocal def fetch_run_status(run): if not is_kive_running: - raise StandardError('Kive connection failed.') + raise RuntimeError('Kive connection failed.') return RUN_COMPLETED self.loader.fetch_run_status = fetch_run_status @@ -484,7 +484,7 @@ def fetch_run_status(run): def test_failed_quality_download(self): # noinspection PyUnusedLocal def download_quality(folder): - raise StandardError('Mock quality failure.') + raise RuntimeError('Mock quality failure.') self.loader.download_quality = download_quality @@ -540,3 +540,11 @@ def test_removing_failure_will_retry_folder(self): self.assertEqual(expected_launched1, launched1) self.assertEqual(expected_launched2, launched2) + + def test_get_sample_number(self): + fastq_name = '1234A-DEL-DRT-PR-RT_S71_L001_R1_001.fastq.gz' + expected_sample_number = 71 + + sample_number = self.loader.get_sample_number(fastq_name) + + self.assertEqual(expected_sample_number, sample_number)