Skip to content
okbp edited this page Sep 14, 2017 · 35 revisions

Obtain test data

  • Original data
    Methylation variation dataset for monocytes ... download from here
    Methylation variation dataset for CD4+ T cells ... download from here
  • Small data

Install virtuoso on macOS (Sierra 10.12.6)

See http://wiki.lifesciencedb.jp/mw/BH12.12/SPARQLthon8/TogoGenome-Virtuoso7

Virtuoso起動 & データインポート

Virtuoso起動

$ cd (/path/to/install/directory/)var/lib/virtuoso/db
$ (/path/to/install/directory/)bin/virtuoso-t -f &

isql(コマンドラインツール)起動

$ (/path/to/install/directory/)bin/isql 1111 dba dba

データインポート

SQL > log_enable(2,1);  
SQL > DB.DBA.TTLP_MT (file_to_string_output('Mono.WGBS.stats.ttl'), '', 'http://iwate-megabank.org/imethyl');  
SQL > checkpoint;  

SPARQL入力画面の表示

http://localhost:8890/sparql

Query 1

全体の件数をカウントする

SELECT (COUNT(*) AS ?count)  
FROM <http://iwate-megabank.org/imethyl>  
WHERE  
{  
  ?s ?p ?o  
}  

Query 2

Methylationのリストを取得する

prefix imt: <http://iwate-megabank.org/imethyl/>  
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
  
SELECT ?met
FROM <http://iwate-megabank.org/imethyl>  
WHERE  
{  
  ?met rdf:type imt:CpGMethylation  
} LIMIT 100  

Query 3

各MethylationのRIとMedianの値を取得する

prefix imt: <http://iwate-megabank.org/imethyl/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?met ?riVal ?medVal
FROM <http://iwate-megabank.org/imethyl>
WHERE
{
  ?met rdf:type imt:CpGMethylation . 
  ?met imt:hasRI ?riVal .
  ?met imt:hasMedian ?medVal
} LIMIT 100

Query 4

RIの値が30以上のMethylationのRIとMedianの値を取得する

prefix imt: <http://iwate-megabank.org/imethyl/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?met ?riVal ?medVal
FROM <http://iwate-megabank.org/imethyl>
WHERE
{
  ?met rdf:type imt:CpGMethylation . 
  ?met imt:hasRI ?riVal .
  ?met imt:hasMedian ?medVal .
  FILTER (?riVal >= 30)
} LIMIT 100

Query 5

RIの値が30以上のMethylationについて、Medianの平均値を取得する

prefix imt: <http://iwate-megabank.org/imethyl/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (AVG(?medVal) as ?medMean)
FROM <http://iwate-megabank.org/imethyl>
WHERE
{
  ?met rdf:type imt:CpGMethylation . 
  ?met imt:hasRI ?riVal .
  ?met imt:hasMedian ?medVal .
  FILTER (?riVal >= 30)
} 

Query 6

RIの値が30以上であり、染色体1番のポジションが100000〜200000の間にあるMethylationについて、ポジションとRI, Medianの値を取得する

prefix imt: <http://iwate-megabank.org/imethyl/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix faldo:  <http://biohackathon.org/resource/faldo#>
prefix ensmbl:   <http://identifiers.org/ensembl/>

SELECT ?chr ?pos ?riVal ?medVal
FROM <http://iwate-megabank.org/imethyl>
WHERE
{
  ?met rdf:type imt:CpGMethylation . 
  ?met imt:hasRI ?riVal .
  ?met imt:hasMedian ?medVal .
  ?met faldo:location [
    a faldo:Region;
    faldo:begin [
      a faldo:Position,
        faldo:ExactPosition;
      faldo:position ?pos;
      faldo:reference ?chr
    ]
  ]
  FILTER (?riVal >= 30)
  FILTER (?chr = ensmbl:hg19.1 && ?pos >= 100000 && ?pos <= 200000)
} LIMIT 100

Query 7 (主語の省略)

Query6の簡易表記版

prefix imt: <http://iwate-megabank.org/imethyl/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix faldo:  <http://biohackathon.org/resource/faldo#>
prefix ensmbl:   <http://identifiers.org/ensembl/>

SELECT ?chr ?pos ?riVal ?medVal
FROM <http://iwate-megabank.org/imethyl>
WHERE
{
  ?met rdf:type imt:CpGMethylation ; 
    imt:hasRI ?riVal ;
    imt:hasMedian ?medVal ;
    faldo:location [
      a faldo:Region;
      faldo:begin [
        a faldo:Position,
          faldo:ExactPosition;
        faldo:position ?pos;
        faldo:reference ?chr
      ]
    ]
  FILTER (?riVal >= 30)
  FILTER (?chr = ensmbl:hg19.1 && ?pos >= 100000 && ?pos <= 200000)
} LIMIT 100

Read and try SPARQL in UniProt

http://sparql.uniprot.org

Try to use ICGC-RDF data at NBDC RDF Portal 

No.1

https://integbio.jp/rdf/sparql

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix dct: <http://purl.org/dc/terms/>
prefix icgc-class: <http://icgc.link/>
prefix icgc: <http://icgc.link/vocab/>

SELECT DISTINCT ?donor_id ?vital_status ?project_code ?primary_site ?seq_strategy ?chr ?start ?end ?strand ?gene_label ?consequence_type ?aa_mutation
WHERE {
  ?donor a icgc-class:Donor ;
           icgc:donor_id ?donor_id ;
           icgc:vital_status ?vital_status ;
           icgc:project ?project .
  ?project icgc:project_code ?project_code ;
           icgc:primary_site ?primary_site .
  FILTER( ?primary_site = "Prostate" )
  ?detection icgc:donor ?donor ;
             icgc:sequencing_strategy ?seq_strategy ;
             icgc:mutation ?mutation .
  ?mutation icgc:chromosome ?chr ;
            icgc:chromosome_start ?start ;
            icgc:chromosome_end ?end ;
            icgc:chromosome_strand ?strand .
  OPTIONAL { 
    ?effect icgc:mutation ?mutation ;
            icgc:gene_affected ?gene ;
            icgc:consequence_type ?consequence_type ;
            icgc:aa_mutation ?aa_mutation . 
    ?gene rdfs:label ?gene_label
    }
} 

ICGC No.2 : 型のキャスト

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix dct: <http://purl.org/dc/terms/>
prefix icgc-class: <http://icgc.link/>
prefix icgc: <http://icgc.link/vocab/>

SELECT DISTINCT (COUNT(?donor) as $num_donors)
WHERE {
  ?donor a icgc-class:Donor;
           icgc:survival_time ?surT;
           icgc:donor_id ?donorID.
  FILTER CONTAINS(?donorID, "DO2").
  FILTER (xsd:integer(?surT) < 2000)
} LIMIT 10

ICGC No.3 : 正規表現でフィルタリング

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix dct: <http://purl.org/dc/terms/>
prefix icgc-class: <http://icgc.link/>
prefix icgc: <http://icgc.link/vocab/>

SELECT DISTINCT (COUNT(?donor) as $num_donors)
WHERE {
  ?donor a icgc-class:Donor;
           icgc:survival_time ?surT;
           icgc:donor_id ?donorID.
#  FILTER CONTAINS(?donorID, "DO2").
  FILTER (xsd:integer(?surT) < 2000)
  FILTER REGEX(?donorID, "^do2", "i")
} 

ICGC No.4 : 複数の条件を組み合わせて表示する

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix dct: <http://purl.org/dc/terms/>
prefix icgc-class: <http://icgc.link/>
prefix icgc: <http://icgc.link/vocab/>

SELECT DISTINCT ?donorID ?surT ?chr ?start ?end ?aamut #(COUNT(?mutation) as ?num_mut) # 
WHERE {
  ?donor a icgc-class:Donor;
           icgc:survival_time ?surT;
           icgc:donor_id ?donorID.
  ?detection icgc:donor ?donor.
  ?detection icgc:mutation ?mutation.
  ?mutation icgc:chromosome ?chr;
            icgc:chromosome_start ?start;
            icgc:chromosome_end ?end.
  OPTIONAL { 
    ?effect icgc:mutation ?mutation;
              icgc:aa_mutation ?aamut .
    }
#  FILTER CONTAINS(?donorID, "DO2").
#  FILTER (xsd:integer(?surT) < 2000)
#  FILTER REGEX(?donorID, "^do2", "i")
} LIMIT 1000 OFFSET 1000000
Clone this wiki locally