Skip to content

Commit

Permalink
Use previous consensus when building next one in remap.
Browse files Browse the repository at this point in the history
Part of #393.

Fix some problems with QAI upload, such as removing HLA variants files.
Start runs sorted by sample number, reversed.
  • Loading branch information
donkirkby committed Jul 24, 2017
1 parent 76b3d17 commit d9ccf04
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 50 deletions.
2 changes: 1 addition & 1 deletion micall/core/remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def remap(fastq1,
# regenerate consensus sequences
distance_report = {}
conseqs = build_conseqs(samfile,
seeds=seeds,
seeds=conseqs,
is_filtered=True,
worker_pool=worker_pool,
filter_coverage=count_threshold//2, # pairs
Expand Down
13 changes: 9 additions & 4 deletions micall/monitor/kive_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ def check_folders(self):
if self.folders is None or now >= self.folder_scan_time:
self.folder_scan_time = now + timedelta(seconds=self.folder_delay)
new_folders = self.find_folders()
if self.folders is None or set(new_folders).difference(self.folders):
old_folders = self.folders or []
if self.folders is None or set(new_folders).difference(old_folders):
# First time or we found a new folder
self.folders = new_folders
self.reset_folders()
Expand Down Expand Up @@ -341,8 +342,7 @@ def is_marked_as_disabled(folder):
def is_quality_control_uploaded(folder):
return os.path.exists(os.path.join(folder, settings.QC_UPLOADED))

@staticmethod
def find_files(folder):
def find_files(self, folder):
""" Find FASTQ files within a folder.
@return: a list of paths to the files within the folder.
Expand All @@ -364,7 +364,7 @@ def find_files(folder):
failed_demultiplexing,
filepath)

return sorted(gz_files)
return sorted(gz_files, key=self.get_sample_number, reverse=True)

def prepare_kive_dataset(self, filename, description, cdt):
""" Upload a dataset to Kive, if it's not already.
Expand Down Expand Up @@ -545,6 +545,11 @@ def get_sample_name(fastq1):
sample_name = short_name + '_' + sample_num
return sample_name

@staticmethod
def get_sample_number(fastq1):
match = re.match(r'.*_S(\d+)_', fastq1)
return int(match.group(1))

def get_run_name(self, pipeline_id, sample_name):
pipeline = self.pipelines[pipeline_id]
name = pipeline['name_format'].format(sample=sample_name,
Expand Down
44 changes: 1 addition & 43 deletions micall/monitor/update_qai.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,41 +118,6 @@ def build_conseqs(conseqs_file,
return result


def build_hla_b_seqs(sample_file):
"""
Build JSON hashes for HLA-B sequence records
@param sample_file: open file that holds the variant info
"""

result = []
expected_exon_prefix = 'HLA-B-exon'
# sample,seed,qcut,region,index,count,seq
rows = csv.DictReader(sample_file)
for row in rows:
ind = int(row['index'])

sample_name = row['sample']
exon = row['region']
if exon.startswith(expected_exon_prefix):
exon_number = int(exon[len(expected_exon_prefix):])
else:
raise ValueError('Unexpected exon {!r}', exon)
qcutoff = row['qcut']
cnt = row['count']
curr_seq = row['seq']

result.append({'samplename': sample_name,
'testcode': None,
'exon': exon_number,
'qcutoff': qcutoff,
'ind': ind,
'cnt': cnt,
'string': curr_seq})

return result


def build_review_decisions(coverage_file,
collated_counts_file,
sample_sheet,
Expand Down Expand Up @@ -265,7 +230,6 @@ def upload_review_to_qai(coverage_file,
run,
sample_sheet,
conseqs,
hla_b_seqs,
session):
""" Create a review.
Expand All @@ -277,8 +241,6 @@ def upload_review_to_qai(coverage_file,
which tags
@param conseqs: an array of JSON hashes to pass to QAI for the conseq
child records
@param hla_b_seqs: an array of JSON hashes to pass to QAI for the hla_b_seq
child records
@param session: the QAI session
"""

Expand All @@ -303,8 +265,7 @@ def upload_review_to_qai(coverage_file,
{'runid': runid,
'pipeline_id': find_pipeline_id(session),
'lab_miseq_review_decisions': decisions,
'lab_miseq_conseqs': conseqs,
'lab_miseq_hla_b_seqs': hla_b_seqs})
'lab_miseq_conseqs': conseqs})


def clean_runname(runname):
Expand Down Expand Up @@ -391,15 +352,12 @@ def process_folder(result_folder):
run,
sample_sheet,
ok_sample_regions)
with open(nuc_variants, "rU") as f:
hla_b_seqs = build_hla_b_seqs(f)
with open(coverage_scores, "rU") as f, open(collated_counts, "rU") as f2:
upload_review_to_qai(f,
f2,
run,
sample_sheet,
conseqs,
hla_b_seqs,
session)


Expand Down
12 changes: 10 additions & 2 deletions micall/tests/kive_loader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def test_unable_to_check_status(self):
# noinspection PyUnusedLocal
def fetch_run_status(run):
if not is_kive_running:
raise StandardError('Kive connection failed.')
raise RuntimeError('Kive connection failed.')
return RUN_COMPLETED

self.loader.fetch_run_status = fetch_run_status
Expand All @@ -484,7 +484,7 @@ def fetch_run_status(run):
def test_failed_quality_download(self):
# noinspection PyUnusedLocal
def download_quality(folder):
raise StandardError('Mock quality failure.')
raise RuntimeError('Mock quality failure.')

self.loader.download_quality = download_quality

Expand Down Expand Up @@ -540,3 +540,11 @@ def test_removing_failure_will_retry_folder(self):

self.assertEqual(expected_launched1, launched1)
self.assertEqual(expected_launched2, launched2)

def test_get_sample_number(self):
fastq_name = '1234A-DEL-DRT-PR-RT_S71_L001_R1_001.fastq.gz'
expected_sample_number = 71

sample_number = self.loader.get_sample_number(fastq_name)

self.assertEqual(expected_sample_number, sample_number)

0 comments on commit d9ccf04

Please sign in to comment.