Skip to content

Commit

Permalink
fixed link to publication pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
JLSteenwyk committed Dec 3, 2023
1 parent f4cb92a commit 835677d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 29 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@
</p>


<center><h1>Run <a href="https://clipkit.genomelybio.com/">ClipKIT in the browser</a> and leave the computing up to <a href="https://www.genomelybio.com/">us</a>!</h1></center>
<center><h2>Run <a href="https://clipkit.genomelybio.com/">ClipKIT in the browser</a> and leave the computing up to <a href="https://www.genomelybio.com/">us</a>!</h2></center>
<br />

ClipKIT is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others.<br /><br />
If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLoS Biology. doi: [10.1371/journal.pbio.3001007](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001007).
If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLOS Biology. doi: [10.1371/journal.pbio.3001007](https://jlsteenwyk.com/publication_pdfs/2020_Steenwyk_etal_PLOS_Biology.pdf).

<br /><br />

---
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/samples/simple.fa.clipkit
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
>1
A-GTAT
A-GAT
>2
A-G-AT
A-GAT
>3
A-G-TA
A-GTA
>4
AGA-TA
AGATA
>5
ACa-T-
ACaT-
59 changes: 37 additions & 22 deletions tests/unit/test_msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from Bio import AlignIO
from clipkit.msa import MSA


def get_biopython_msa(file_path, file_format="fasta"):
return AlignIO.read(open(file_path), file_format)

Expand All @@ -12,40 +13,54 @@ class TestMSA(object):
def test_clipkit_msa_from_bio_msa(self):
bio_msa = get_biopython_msa("tests/unit/examples/simple.fa")
msa = MSA.from_bio_msa(bio_msa)
assert msa.header_info == [{'id': '1', 'name': '1', 'description': '1'}, {'id': '2', 'name': '2', 'description': '2'}, {'id': '3', 'name': '3', 'description': '3'}, {'id': '4', 'name': '4', 'description': '4'}, {'id': '5', 'name': '5', 'description': '5'}]
expected_seq_records = np.array([
['A', '-', 'G', 'T', 'A', 'T'],
['A', '-', 'G', '-', 'A', 'T'],
['A', '-', 'G', '-', 'T', 'A'],
['A', 'G', 'A', '-', 'T', 'A'],
['A', 'C', 'a', '-', 'T', '-']
])
assert msa.header_info == [
{"id": "1", "name": "1", "description": "1"},
{"id": "2", "name": "2", "description": "2"},
{"id": "3", "name": "3", "description": "3"},
{"id": "4", "name": "4", "description": "4"},
{"id": "5", "name": "5", "description": "5"},
]
expected_seq_records = np.array(
[
["A", "-", "G", "T", "A", "T"],
["A", "-", "G", "-", "A", "T"],
["A", "-", "G", "-", "T", "A"],
["A", "G", "A", "-", "T", "A"],
["A", "C", "a", "-", "T", "-"],
]
)
np.testing.assert_equal(msa.seq_records, expected_seq_records)

def test_trim_by_provided_site_positions_np_array(self):
bio_msa = get_biopython_msa("tests/unit/examples/simple.fa")
msa = MSA.from_bio_msa(bio_msa)
sites_to_trim = np.array([1, 4])
msa.trim(site_positions_to_trim=sites_to_trim)
expected_sites_kept = np.array([
['A', 'G', 'T', 'T'],
['A', 'G', '-', 'T'],
['A', 'G', '-', 'A'],
['A', 'A', '-', 'A'],
['A', 'a', '-', '-']
])
expected_sites_kept = np.array(
[
["A", "G", "T", "T"],
["A", "G", "-", "T"],
["A", "G", "-", "A"],
["A", "A", "-", "A"],
["A", "a", "-", "-"],
]
)
np.testing.assert_equal(msa.sites_kept, expected_sites_kept)

def test_trim_by_provided_site_positions_list(self):
bio_msa = get_biopython_msa("tests/unit/examples/simple.fa")
msa = MSA.from_bio_msa(bio_msa)
sites_to_trim = [1, 4]
msa.trim(site_positions_to_trim=sites_to_trim)
expected_sites_kept = np.array([
['A', 'G', 'T', 'T'],
['A', 'G', '-', 'T'],
['A', 'G', '-', 'A'],
['A', 'A', '-', 'A'],
['A', 'a', '-', '-']
])
expected_sites_kept = np.array(
[
["A", "G", "T", "T"],
["A", "G", "-", "T"],
["A", "G", "-", "A"],
["A", "A", "-", "A"],
["A", "a", "-", "T"],
]
)
msa = msa.to_bio_msa()
print(vars(msa))
np.testing.assert_equal(msa.sites_kept, expected_sites_kept)

0 comments on commit 835677d

Please sign in to comment.