diff --git a/README.md b/README.md deleted file mode 100644 index 493bb1a..0000000 --- a/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Bio2RDF-scripts -=============== - This Git repository holds all of the RDF converter scripts used to generate Bio2RDF linked data. - -Requirements -------------- -The majority of these scripts were created so as to run on Linux servers. Depending on the script you wish to run you will need: - -* PHP5 -* Perl 6 -* Ruby(1.9.3) -* Java 1.5 -* Bio2RDF API (available from: https://github.com/micheldumontier/php-lib) - -The Bio2RDF API should be installed in the root directory of the project tree (i.e bio2rdf-scripts). - -Usage ------ -Each script is run independently. See README files inside each script directory. - -Contact Us ---------- -For more information visit http://bio2rdf.org or http://dumontierlab.com - ---- -Licensed under [MIT License](http://en.wikipedia.org/wiki/MIT_License), see [license page](https://github.com/bio2rdf/bio2rdf-scripts/wiki/MIT-License) for details. diff --git a/bioportal/bioportal.php b/bioportal/bioportal.php index cece375..a3028ad 100644 --- a/bioportal/bioportal.php +++ b/bioportal/bioportal.php @@ -89,8 +89,9 @@ function Run() foreach($ontologies AS $i => $o) { $label = (string) $o->name; $abbv = (string) $o->acronym; - - if(array_search($abbv,$exclude_list) !== FALSE) continue; + if(array_search($abbv,$exclude_list) !== FALSE) { + continue; + } if($include_list[0] != 'all') { // ignore if we don't find it in the include list OR we do find it in the exclude list if( (array_search($abbv,$include_list) === FALSE) @@ -106,9 +107,7 @@ function Run() if(!isset($ls['hasOntologyLanguage'])) {echo 'insufficient metadata'.PHP_EOL;continue;} $format = strtolower($ls['hasOntologyLanguage']); -/***********/ if($format != 'owl' and $format != 'obo') continue; - echo "Processing ($i/$total) $abbv ... "; $version = $ls['version']; @@ -117,7 +116,6 @@ function Run() $rfile = $ls['ontology']['links']['download']; - $lfile = $abbv.".".$format.".gz"; if(parent::getParameterValue('download') == 'true') { echo "downloading ... "; @@ -138,6 +136,7 @@ function Run() if(isset($m[1])) { $filename = $m[1]; if(strstr($filename,".zip")) continue; + } else {echo "error: no filename".PHP_EOL;continue;} $body = substr($ret, $header_size); @@ -148,6 +147,7 @@ function Run() $lz = "compress.zlib://".$idir.$lfile; file_put_contents($lz,$body); + echo "done".PHP_EOL; } @@ -160,6 +160,11 @@ function Run() // process echo "converting ... "; set_time_limit(0); + // let's double check the format + $fp = gzopen($idir.$lfile,"r"); + $l = gzgets($fp); + if(strstr($l,"xml")) $format= "owl"; + gzclose($fp); if($format == 'obo') { $this->OBO2RDF($abbv); } else if($format == 'owl') { @@ -366,6 +371,7 @@ function OBO2RDF($abbv) if($abbv == "doid") $abbv = "do"; $minimal = (parent::getParameterValue('detail') == 'min')?true:false; $minimalp = (parent::getParameterValue('detail') == 'min+')?true:false; + $version = parent::getParameterValue("bio2rdf_release"); $tid = ''; $first = true; @@ -374,7 +380,7 @@ function OBO2RDF($abbv) $min = $buf = ''; $ouri = "http://bio2rdf.org/lsr:".$abbv; - $dataset_uri = $abbv."_resource:bio2rdf.dataset.$abbv.R3"; + $dataset_uri = $abbv."_resource:bio2rdf.dataset.$abbv.R".$version; parent::setGraphURI($dataset_uri); $buf = parent::triplify($ouri,"rdf:type","owl:Ontology"); $graph_uri = '<'.parent::getRegistry()->getFQURI(parent::getGraphURI()).'>'; diff --git a/clinicaltrials/clinicaltrials.php b/clinicaltrials/clinicaltrials.php index d1fc868..78c74e1 100644 --- a/clinicaltrials/clinicaltrials.php +++ b/clinicaltrials/clinicaltrials.php @@ -233,12 +233,11 @@ function parse_dir(){ **/ function process_file($infile) { $indir = parent::getParameterValue('indir'); - $xml = new CXML($indir,basename($infile)); + $xml = new CXML($infile); $this->setCheckPoint('file'); while($xml->Parse("clinical_study") == TRUE) { $this->setCheckPoint('record'); $this->root = $root = $xml->GetXMLRoot(); - $this->nct_id = $nct_id = $this->getString("//id_info/nct_id"); $this->study_id = $study_id = parent::getNamespace()."$nct_id"; @@ -759,7 +758,7 @@ function process_file($infile) { parent::triplifyString($location_uri,parent::getVoc()."status", $this->getString('//status',$location)). parent::triplify($study_id,parent::getVoc()."location",$location_uri). parent::triplify($location_uri, parent::getVoc()."address", $this->makeAddress($address)). - parent::triplify($location_uri, parent::getVoc()."contact", $this->makeContact($contact)) + ($contact != null?parent::triplify($location_uri, parent::getVoc()."contact", $this->makeContact($contact)):"") ); if($backups) { foreach($backups AS $backup) { @@ -831,11 +830,13 @@ function process_file($infile) { try{ $links = $root->xpath('//link'); foreach($links AS $i => $link) { - $lid = parent::getRes().md5($this->getString('./url',$link)); + $url = $this->getString('./url',$link); + $url = preg_replace("/>.*$/","",$url); + $lid = parent::getRes().md5($url); parent::addRDF( parent::describeIndividual($lid, $this->getString('./description',$link), parent::getVoc()."Link"). parent::describeClass(parent::getVoc()."Link","Link"). - parent::triplify($lid,parent::getVoc()."url",preg_replace("/>$/","",$this->getString('./url',$link))). + parent::triplify($lid,parent::getVoc()."url",$url). parent::triplify($study_id,parent::getVoc()."link",$lid) ); } @@ -1166,7 +1167,7 @@ function process_file($infile) { if(!$et) continue; $ev_uri = parent::getVoc().str_replace(" ","-",$ev_label); - $categories = array_shift($et->xpath('./category_list')); + $categories = @array_shift($et->xpath('./category_list')); foreach($categories AS $category) { $major_title = $this->getString('./title', $category); $major_title_uri = parent::getRes().md5($major_title); @@ -1272,8 +1273,8 @@ public function getDatetimeFromDate($date) public function makeContact($contact) { - if($contact == null) return null; - $contact_uri = parent::getRes().md5($contact->asXML()); + if($contact == null) return ''; + $contact_uri = parent::getRes().md5($contact->asXML()); $contact_type_uri = parent::getVoc()."Contact"; $contact_label = trim($this->getString('//first_name',$contact)." ".$this->getString('//last_name', $contact)); parent::addRDF( diff --git a/drugbank/drugbank.php b/drugbank/drugbank.php index ac7757b..d19c76d 100755 --- a/drugbank/drugbank.php +++ b/drugbank/drugbank.php @@ -34,17 +34,17 @@ class DrugBankParser extends Bio2RDFizer { function __construct($argv) { - parent::__construct($argv,"drugbank"); - parent::addParameter('files', true, 'all|drugbank','all','Files to convert'); - parent::addParameter('download_url',false,null,'http://www.drugbank.ca/system/downloads/current/'); - parent::initialize(); + parent::__construct($argv,"drugbank"); + parent::addParameter('files', true, 'all|drugbank','all','Files to convert'); + parent::addParameter('download_url',false,null,'http://www.drugbank.ca/system/downloads/current/'); + parent::initialize(); } function Run() { - $indir = parent::getParameterValue('indir'); - $outdir = parent::getParameterValue('outdir'); - $download_url = parent::getParameterValue('download_url'); + $indir = parent::getParameterValue('indir'); + $outdir = parent::getParameterValue('outdir'); + $download_url = parent::getParameterValue('download_url'); if(parent::getParameterValue('files') == 'all') { $files = explode("|",parent::getParameterList('files')); @@ -52,11 +52,11 @@ function Run() } else { $files = explode("|",parent::getParameterValue('files')); } - + if(parent::getParameterValue("id_list")) { $this->id_list = array_flip(explode(",",parent::getParameterValue('id_list'))); } - + $dataset_description = ''; foreach($files AS $f) { if($f == 'drugbank') { @@ -64,7 +64,7 @@ function Run() $lname = 'drugbank'; } $fnx = 'parse_'.$f; - + $rfile = parent::getParameterValue('download_url').$file; $lfile = parent::getParameterValue('indir').$file; $cfile = $lname.".".parent::getParameterValue('output_format'); @@ -73,10 +73,11 @@ function Run() if(!file_exists($lfile) || parent::getParameterValue('download') == true) { utils::downloadSingle($rfile,$lfile); } - + // setup the write $gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true; parent::setWriteFile($outdir.$cfile, $gz); + echo $outdir.$cfile; if(file_exists($indir.$file)) { // call the parser echo "processing $file ...".PHP_EOL; @@ -85,65 +86,65 @@ function Run() parent::clear(); } parent::getWriteFile()->close(); - + // dataset description $ouri = parent::getGraphURI(); parent::setGraphURI(parent::getDatasetURI()); - + $source_version = parent::getDatasetVersion(); $bVersion = parent::getParameterValue('bio2rdf_release'); $prefix = parent::getPrefix(); $date = date ("Y-m-d\TH:i:sP"); // dataset description $source_file = (new DataResource($this)) - ->setURI($rfile) - ->setTitle("DrugBank ($file)") - ->setRetrievedDate( date ("Y-m-d\TH:i:sP", filemtime($indir.$file))) - ->setFormat("application/xml") - ->setFormat("application/zip") - ->setPublisher("http://drugbank.ca") - ->setHomepage("http://drugbank.ca") - ->setRights("use") - ->setRights("by-attribution") - ->setRights("no-commercial") - ->setLicense("http://www.drugbank.ca/about") - ->setDataset("http://identifiers.org/drugbank/"); - + ->setURI($rfile) + ->setTitle("DrugBank ($file)") + ->setRetrievedDate( date ("Y-m-d\TH:i:sP", filemtime($indir.$file))) + ->setFormat("application/xml") + ->setFormat("application/zip") + ->setPublisher("http://drugbank.ca") + ->setHomepage("http://drugbank.ca") + ->setRights("use") + ->setRights("by-attribution") + ->setRights("no-commercial") + ->setLicense("http://www.drugbank.ca/about") + ->setDataset("http://identifiers.org/drugbank/"); + $output_file = (new DataResource($this)) - ->setURI("http://download.bio2rdf.org/release/$bVersion/$prefix/$cfile") - ->setTitle("Bio2RDF v$bVersion RDF version of $prefix v$source_version") - ->setSource($source_file->getURI()) - ->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php") - ->setCreateDate($date) - ->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html") - ->setPublisher("http://bio2rdf.org") - ->setRights("use-share-modify") - ->setRights("by-attribution") - ->setRights("restricted-by-source-license") - ->setLicense("http://creativecommons.org/licenses/by/3.0/") - ->setDataset(parent::getDatasetURI()); + ->setURI("http://download.bio2rdf.org/release/$bVersion/$prefix/$cfile") + ->setTitle("Bio2RDF v$bVersion RDF version of $prefix v$source_version") + ->setSource($source_file->getURI()) + ->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php") + ->setCreateDate($date) + ->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html") + ->setPublisher("http://bio2rdf.org") + ->setRights("use-share-modify") + ->setRights("by-attribution") + ->setRights("restricted-by-source-license") + ->setLicense("http://creativecommons.org/licenses/by/3.0/") + ->setDataset(parent::getDatasetURI()); $gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true; if($gz) $output_file->setFormat("application/gzip"); if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples"); else $output_file->setFormat("application/n-quads"); - + parent::writeToReleaseFile($source_file->toRDF().$output_file->toRDF()); parent::setGraphURI($ouri); } - parent::closeReleaseFile(); + parent::closeReleaseFile(); } - function parse_drugbank($ldir,$infile) - { - $xml = new CXML($ldir,$infile); + function parse_drugbank($ldir,$infile) + { + $xml = new CXML($ldir.$infile); while($xml->parse("drug") == TRUE) { if(isset($this->id_list) and count($this->id_list) == 0) break; $this->parseDrugEntry($xml); } unset($xml); - } + } function NSMap($source) @@ -179,122 +180,126 @@ function NSMap($source) return strtolower($source); } } - - function parsePartnerEntry($did, &$x,$type) - { - $id = (string)$x->id; - $pid = "drugbank:".$id; - $lid = parent::getRes().substr($did,strpos($did,":")+1)."_".$id; // local pivot to keep the action between the drug and target - $name = (string) $x->name; - $parent = parent::getVoc().ucfirst(str_replace(" ","-",$type)); - parent::addRDF( - parent::describeIndividual($pid,$name,$parent). - parent::describeClass($parent,ucfirst($type)). - parent::triplify($did,parent::getVoc().$type,$pid). - parent::describeIndividual($lid,"$did to $pid relation",parent::getVoc().ucfirst("$type-Relation")). - parent::describeClass(parent::getVoc().ucfirst("$type-Relation"), ucfirst("$type Relation")). - parent::triplify($lid,parent::getVoc()."drug",$did). - parent::triplify($lid,parent::getVoc().$type,$pid) - ); - - // iterate over all the child nodes - foreach($x->children() AS $k => $v) { - // get the direct values - if(!$v->children()) { - // special cases - if($k == "references") { // for local pivot - $a = preg_match_all("/pubmed\/([0-9]+)/",$v,$m); - if(isset($m[1])) { - foreach($m[1] AS $pmid) { - parent::addRDF( - parent::triplify($lid,parent::getVoc()."reference","pubmed:".$pmid) - ); - } - } - } else if($v != '') { - if($k == 'known-action') { // for local pivot - parent::addRDF( - parent::triplifyString($lid,parent::getVoc().$k,(string)$v) - ); - } else { - parent::addRDF( - parent::triplifyString($pid,parent::getVoc().$k,(string)$v) - ); - } - } - - } else { - // work with nested elements - $found = false; - $list_name = $k; - $item_name = substr($k,0,-1); - foreach($v->children() AS $k2 => $v2) { - if($k2 == "organism") { - // extract the ncbi id - $taxid = $v2->attributes()->{'ncbi-taxonomy-id'}; - $label = $v2; + + function parsePartnerEntry($did, $pid, $x) + { + $debug = false; + if(!isset($x->polypeptide)) { + return; + } + $partner = $x->polypeptide; + + foreach($partner->children() AS $k1 => $v1) { + // get the direct values + $tv1 = trim($v1); + $nc = count($v1->children()); + + if($debug) echo "tv1 $nc $k1 $tv1".PHP_EOL; + if($nc == 0) { + if($debug) echo "k1 $k1 $v1".PHP_EOL; parent::addRDF( - parent::triplify($pid, parent::getVoc()."x-taxonomy", "taxonomy:$taxid"). - parent::triplifyString("taxonomy:$taxid","rdfs:label",$label) + parent::triplifyString($pid,parent::getVoc().$k1,(string)$v1) ); - continue; - } - - if(!$v2->children()) { - // echo "not nested $k2".PHP_EOL; - if($k2 == "action") { - $aid = str_replace(array(" ","/"),"-",$v2); + + if($k1 == "organism") { + $taxid = $v1->attributes()->{'ncbi-taxonomy-id'}; parent::addRDF( - parent::describeIndividual($lid,$v2,parent::getVoc()."Action"). - parent::describeClass(parent::getVoc()."Action","Action"). - parent::triplify($lid,parent::getVoc()."action",parent::getVoc().$aid) + parent::triplify($pid, parent::getVoc()."x-taxonomy", "taxonomy:$taxid") ); - } else { - // default handler + } + continue; + } + + foreach($v1->children() AS $k2 => $v2) { + $tv2 = trim($v2); + $nc = count($v2->children()); + + if($debug) echo "tv2 $nc $k2 $tv2".PHP_EOL; + if($nc == 0) { + if($debug) echo "k2 $k2 $v2".PHP_EOL; parent::addRDF( - parent::triplifyString($pid, parent::getVoc().$k2, "".$v2) + parent::triplifyString($pid,parent::getVoc().$k2,(string)$v2) ); + continue; } - } else { // nested elements - // echo "nested $k2".PHP_EOL; - foreach($v2->children() AS $k3 => $v3) { - // echo " ".$k3.PHP_EOL; - if(!$v3->children()) { + + if($k2 == 'external-identifier') { + $ns = $this->NSMap($v2->resource); + $id = (string) $v2->identifier; + $id = str_replace(array("HGNC:","GNC:"),"",$id); + parent::addRDF( + parent::triplify($pid, parent::getVoc()."x-$ns","$ns:$id") + ); + if($ns == "uniprot") { parent::addRDF( - parent::triplifyString($pid,parent::getVoc().$k3, "".$v3) + parent::triplify("$ns:$id","skos:exactMatch","http://purl.uniprot.org/uniprot/$id") ); - } else { - foreach($v3 AS $k4 => $v4) { - if($k3 == 'external-identifier') { - $ns = $this->NSMap($v3->resource); - $id = (string) $v3->identifier; - $id = str_replace(array("HGNC:","GNC:"),"",$id); - parent::addRDF( - parent::triplify($pid, parent::getVoc()."x-$ns","$ns:$id") - ); - } else if($k3 == 'pfam') { - parent::addRDF( - parent::triplify($pid, parent::getVoc()."x-pfam","pfam:"."".$v3->identifier) - ); - } else if($k3 == "go-classifier") { - parent::addRDF( - parent::triplifyString($pid, parent::getVoc()."go-".$v3->category, $v3->description) - ); - } else { - trigger_error("no handler for $k3",E_USER_WARNING); -/* parent::addRDF( - parent::triplifyString($pid, parent::getVoc().$k3, $v4) - ); -*/ - } - } } + } else if($k2 == 'pfam') { + parent::addRDF( + parent::triplify($pid, parent::getVoc()."x-pfam","pfam:"."".$v2->identifier) + ); + } else if($k2 == "go-classifier") { + parent::addRDF( + parent::triplifyString($pid, parent::getVoc()."go-".$v2->category, $v2->description) + ); + } else { + trigger_error("no handler for $k2",E_USER_WARNING); + /* parent::addRDF( + parent::triplifyString($pid, parent::getVoc().$k3, $v4) + ); + */ } } - } // foreach - } - } - } + } + } + + + function parsePartnerRelation($did, &$x,$type) + { + $id = (string)$x->id; + $pid = "drugbank:".$id; + $lid = parent::getRes().substr($did,strpos($did,":")+1)."_".$id; // local pivot to keep the action between the drug and target + $parent = parent::getVoc().ucfirst(str_replace(" ","-",$type)); + $name = (string) $x->name; + $knownaction = (string) $x->{'known-action'}; + $knownaction = $knownaction=="unknown"?'':$knownaction; + + parent::addRDF( + parent::describeIndividual($pid,$name,$parent). + parent::describeClass($parent,ucfirst($type)). + parent::triplify($did,parent::getVoc().$type,$pid). + parent::describeIndividual($lid,"$did to $pid relation",parent::getVoc().ucfirst("$type-Relation")). + parent::describeClass(parent::getVoc().ucfirst("$type-Relation"), ucfirst("$type Relation")). + parent::triplify($lid,parent::getVoc()."drug",$did). + parent::triplify($lid,parent::getVoc().$type,$pid). + parent::triplifyString($lid, parent::getVoc()."known-action", $knownaction) + ); + + // the main elements for the relation are actions, known-action, references, + foreach($x->actions AS $actions => $action) { + if(!trim($action)) continue; + $aid = str_replace(array(" ","/"),"-",$action->action); + $auri = parent::getVoc().$aid; + parent::addRDF( + parent::describeIndividual($auri,$action->action,parent::getVoc()."Action"). + parent::describeClass(parent::getVoc()."Action","Action"). + parent::triplify($lid,parent::getVoc()."action",$auri) + ); + } + if(isset($x->references)) { + foreach( explode("\n",$x->references) AS $ref) { + preg_match_all("/pubmed\/([0-9]+)/",$ref,$m); + foreach($m[1] AS $pmid) { + parent::addRDF( + parent::triplify($lid,parent::getVoc()."reference","pubmed:".$pmid) + ); + } + } + } + + $this->parsePartnerEntry($did, $pid, $x); + } /** * @description check if a type has already been defined and add appropriate RDF @@ -315,393 +320,399 @@ function typify($id,$tid,$subclass,$label) function parseDrugEntry(&$xml) { - $declared = null; // a list of all the entities declared - $counter = 1; - $x = $xml->GetXMLRoot(); - $dbid = (string) $x->{"drugbank-id"}; - $did = "drugbank:".$dbid; - $name = (string)$x->name; - $type = ucfirst((string)str_replace(" ","-",$x->attributes()->type)); - $type_label = ucfirst($x->attributes()->type); - $description = null; - - if(isset($this->id_list)) { - if(!isset($this->id_list[$dbid])) return; - unset($this->id_list[$dbid]); - } - - echo $dbid.PHP_EOL; - if(isset($x->description) && $x->description != '') { - $description = trim((string)$x->description); - } + $declared = null; // a list of all the entities declared + $counter = 1; + $x = $xml->GetXMLRoot(); + $dbid = (string) $x->{"drugbank-id"}; + $did = "drugbank:".$dbid; + $name = (string)$x->name; + $type = ucfirst((string)str_replace(" ","-",$x->attributes()->type)); + $type_label = ucfirst($x->attributes()->type); + $description = null; + + if(isset($this->id_list)) { + if(!isset($this->id_list[$dbid])) return; + unset($this->id_list[$dbid]); + } - parent::addRDF( - parent::describeIndividual($did, $name, parent::getVoc()."Drug",$name, $description). - parent::describeClass(parent::getVoc()."Drug","Drug"). - parent::triplify($did,"owl:sameAs","http://identifiers.org/drugbank/".$dbid). - parent::triplify($did,"rdfs:seeAlso","http://www.drugbank.ca/drugs/".$dbid). - parent::triplify($did,"rdf:type", parent::getVoc().$type). - parent::describeClass(parent::getVoc().$type, $type_label) - ); + echo "Processing $dbid".PHP_EOL; + if(isset($x->description) && $x->description != '') { + $description = trim((string)$x->description); + } - foreach($x->{'drugbank-id'} AS $id) { - parent::addRDF( - parent::triplifyString($did, parent::getVoc()."drugbank-id", $id) - ); - } - if(isset($x->{'cas-number'})) { parent::addRDF( - parent::triplify($did, parent::getVoc()."x-cas", "cas:".$x->{'cas-number'}) + parent::describeIndividual($did, $name, parent::getVoc()."Drug",$name, $description). + parent::describeClass(parent::getVoc()."Drug","Drug"). + parent::triplify($did,"owl:sameAs","http://identifiers.org/drugbank/".$dbid). + parent::triplify($did,"rdfs:seeAlso","http://www.drugbank.ca/drugs/".$dbid). + parent::triplify($did,"rdf:type", parent::getVoc().$type). + parent::describeClass(parent::getVoc().$type, $type_label) ); - } - - $literals = array( - "indication", - "pharmacology", - "mechanism-of-action", - "toxicity", - "biotransformation", - "absorption", - "half-life", - "protein-binding", - "route-of-elimination", - "volume-of-distribution", - "clearance" - ); - - foreach($literals AS $l) { - if(isset($x->$l) and $x->$l != '') { - $lid = parent::getRes().md5($l.$x->$l); + + foreach($x->{'drugbank-id'} AS $id) { parent::addRDF( - parent::describeIndividual($lid,"$l for $did",parent::getVoc().ucfirst($l), "$l for $did",$x->$l). - parent::describeClass(parent::getVoc().ucfirst($l),ucfirst(str_replace("-"," ",$l))). - parent::triplify($did,parent::getVoc().$l,$lid) + parent::triplifyString($did, parent::getVoc()."drugbank-id", $id) + ); + } + if(isset($x->{'cas-number'})) { + parent::addRDF( + parent::triplify($did, parent::getVoc()."x-cas", "cas:".$x->{'cas-number'}) ); } - } + + $literals = array( + "indication", + "pharmacodynamics", + "mechanism-of-action", + "toxicity", + "biotransformation", + "absorption", + "half-life", + "protein-binding", + "route-of-elimination", + "volume-of-distribution", + "clearance" + ); + + foreach($literals AS $l) { + if(isset($x->$l) and $x->$l != '') { + $lid = parent::getRes().md5($l.$x->$l); + parent::addRDF( + parent::describeIndividual($lid,"$l for $did",parent::getVoc().ucfirst($l), "$l for $did",$x->$l). + parent::describeClass(parent::getVoc().ucfirst($l),ucfirst(str_replace("-"," ",$l))). + parent::triplify($did,parent::getVoc().$l,$lid) + ); + } + } // TODO:: Replace the next two lines - $this->AddList($x,$did,"groups","group",parent::getVoc()."group"); - $this->AddList($x,$did,"categories","category",parent::getVoc()."category"); + $this->AddList($x,$did,"groups","group",parent::getVoc()."group"); + $this->AddList($x,$did,"categories","category",parent::getVoc()."category"); - if(isset($x->classification)) { - foreach($x->classification->children() AS $k => $v) { - $cid = parent::getRes().md5($v); - parent::addRDF( - parent::describeIndividual($cid, $v, parent::getVoc()."Drug-Classification-Category"). - parent::describeClass(parent::getVoc()."Drug-Classification-Category","Drug Classification Category"). - parent::triplify($did, parent::getVoc()."drug-classification-category", $cid) - ); + if(isset($x->classification)) { + foreach($x->classification->children() AS $k => $v) { + $cid = parent::getRes().md5($v); + parent::addRDF( + parent::describeIndividual($cid, $v, parent::getVoc()."Drug-Classification-Category"). + parent::describeClass(parent::getVoc()."Drug-Classification-Category","Drug Classification Category"). + parent::triplify($did, parent::getVoc()."drug-classification-category", $cid) + ); + } } - } - $this->addLinkedResource($x, $did, 'atc-codes','atc-code','atc'); - $this->addLinkedResource($x, $did, 'ahfs-codes','ahfs-code','ahfs'); + $this->addLinkedResource($x, $did, 'atc-codes','atc-code','atc'); + $this->addLinkedResource($x, $did, 'ahfs-codes','ahfs-code','ahfs'); - // taxonomy - $this->AddText($x,$did,"taxonomy","kingdom",parent::getVoc()."kingdom"); + // taxonomy + $this->AddText($x,$did,"taxonomy","kingdom",parent::getVoc()."kingdom"); - // substructures - $this->AddText($x,$did,"taxonomy","substructures",parent::getVoc()."substructure", "substructure"); + // substructures + $this->AddText($x,$did,"taxonomy","substructures",parent::getVoc()."substructure", "substructure"); - // synonyms - $this->AddCategory($x,$did,"synonyms","synonym",parent::getVoc()."synonym"); + // synonyms + $this->AddCategory($x,$did,"synonyms","synonym",parent::getVoc()."synonym"); - // brand names - $this->AddCategory($x,$did,"brands","brand",parent::getVoc()."brand"); + // brand names + $this->AddCategory($x,$did,"international-brands","international-brand",parent::getVoc()."brand"); - // salt - if(isset($x->salts->salt)) { - foreach($x->salts->salt AS $s) { - $sid = parent::getPrefix().':'.$s->{'drugbank-id'}; - parent::addRDF( - parent::describeIndividual($sid, $s->name, parent::getVoc()."Salt"). - parent::describeClass(parent::getVoc()."Salt", "Salt"). - parent::triplify($did, parent::getVoc()."salt", $sid). - parent::triplify($sid, parent::getVoc()."x-cas", "cas:".$s->{'cas-number'}). - parent::triplify($sid, parent::getVoc()."x-inchikey", "inchikey:".$s->{'inchikey'}) - ); + // salt + if(isset($x->salts->salt)) { + foreach($x->salts->salt AS $s) { + $sid = parent::getPrefix().':'.$s->{'drugbank-id'}; + parent::addRDF( + parent::describeIndividual($sid, $s->name, parent::getVoc()."Salt"). + parent::describeClass(parent::getVoc()."Salt", "Salt"). + parent::triplify($did, parent::getVoc()."salt", $sid). + parent::triplify($sid, parent::getVoc()."x-cas", "cas:".$s->{'cas-number'}). + parent::triplify($sid, parent::getVoc()."x-inchikey", "inchikey:".$s->{'inchikey'}) + ); + } } - } // mixtures // Cauterexdornase alfa + fibrinolysin + gentamicin sulfate - if(isset($x->mixtures)) { - $id = 0; - foreach($x->mixtures->mixture AS $item) { - if(isset($item)) { - $o = $item; - $mid = parent::getRes().str_replace(" ","-",$o->name[0]); - - parent::addRDF( - parent::triplify($did,parent::getVoc()."mixture",$mid). - parent::describeIndividual($mid,$o->name[0],parent::getVoc()."Mixture"). - parent::describeClass(parent::getVoc()."Mixture","mixture"). - parent::triplifyString($mid,$this->getVoc()."ingredients","".$o->ingredients[0]) - ); - - $a = explode(" + ",$o->ingredients[0]); - foreach($a AS $b) { - $b = trim($b); - $iid = parent::getRes().str_replace(" ","-",$b); - parent::addRDF( - parent::describeClass($iid,$b, parent::getVoc()."Ingredient"). - parent::describeClass(parent::getVoc()."Ingredient","Ingredient"). - parent::triplify($mid,parent::getVoc()."ingredient",$iid) - ); - } - } - } - } - - // packagers - // Cardinal Healthhttp://www.cardinal.com - if(isset($x->packagers)) { - foreach($x->packagers AS $items) { - if(isset($items->packager)) { - foreach($items->packager AS $item) { - $pid = parent::getRes().md5($item->name); + if(isset($x->mixtures)) { + $id = 0; + foreach($x->mixtures->mixture AS $item) { + if(isset($item)) { + $o = $item; + $mid = parent::getRes().str_replace(" ","-",$o->name[0]); - parent::addRDF( - parent::triplify($did,parent::getVoc()."packager",$pid) - ); - if(!isset($defined[$pid])) { - $defined[$pid] = ''; - parent::addRDF( - parent::describe($pid,"".$item->name[0]) - ); - - if(strstr($item->url,"http://") && $item->url != "http://BASF Corp."){ parent::addRDF( - $this->triplify($pid,"rdfs:seeAlso","".$item->url[0]) + parent::triplify($did,parent::getVoc()."mixture",$mid). + parent::describeIndividual($mid,$o->name[0],parent::getVoc()."Mixture"). + parent::describeClass(parent::getVoc()."Mixture","mixture"). + parent::triplifyString($mid,$this->getVoc()."ingredients","".$o->ingredients[0]) ); - } - } - } - } - } - } - -// // manufacturers - $this->AddText($x,$did,"manufacturers","manufacturer",parent::getVoc()."manufacturer"); // @TODO RESOURCE - - // prices - if(isset($x->prices->price)) { - foreach($x->prices->price AS $product) { - $pid = parent::getRes().md5($product->description); - parent::addRDF( - parent::describeIndividual($pid,$product->description,parent::getVoc()."Pharmaceutical",$product->description). - parent::describeClass(parent::getVoc()."Pharmaceutical","pharmaceutical"). - parent::triplifyString($pid,parent::getVoc()."price","".$product->cost,"xsd:float"). - parent::triplify($did, parent::getVoc()."product", $pid) - ); - $uid = parent::getVoc().md5($product->unit); - parent::addRDF( - parent::describeIndividual($uid,$product->unit,parent::getVoc()."Unit",$product->unit). - parent::describeClass(parent::getVoc()."Unit","unit"). - parent::triplify($pid,parent::getVoc()."form",$uid) - ); + $a = explode(" + ",$o->ingredients[0]); + foreach($a AS $b) { + $b = trim($b); + $iid = parent::getRes().str_replace(" ","-",$b); + parent::addRDF( + parent::describeClass($iid,$b, parent::getVoc()."Ingredient"). + parent::describeClass(parent::getVoc()."Ingredient","Ingredient"). + parent::triplify($mid,parent::getVoc()."ingredient",$iid) + ); + } + } + } } - } - // dosages
Powder, for solution
Intravenous
- if(isset($x->dosages->dosage)) { - foreach($x->dosages->dosage AS $dosage) { - $id = parent::getRes().md5($dosage->strength.$dosage->form.$dosage->route); - $label = (($dosage->strength != '')?$dosage->strength." ":"").$dosage->form." form with ".$dosage->route. " route"; + // packagers + // Cardinal Healthhttp://www.cardinal.com + if(isset($x->packagers)) { + foreach($x->packagers AS $items) { + if(isset($items->packager)) { + foreach($items->packager AS $item) { + $pid = parent::getRes().md5($item->name); - parent::addRDF( - parent::describeIndividual($id,$label,parent::getVoc()."Dosage"). - parent::describeClass(parent::getVoc()."Dosage","Dosage"). - parent::triplify($did, parent::getVoc()."dosage", $id) - ); + parent::addRDF( + parent::triplify($did,parent::getVoc()."packager",$pid) + ); + if(!isset($defined[$pid])) { + $defined[$pid] = ''; + parent::addRDF( + parent::describe($pid,"".$item->name[0]) + ); - $rid = parent::getVoc().md5($dosage->route); - $this->typify($id,$rid,"Route","".$dosage->route); + if(strstr($item->url,"http://") && $item->url != "http://BASF Corp."){ + parent::addRDF( + $this->triplify($pid,"rdfs:seeAlso","".$item->url[0]) + ); + } + } + } + } + } + } - $fid = parent::getVoc().md5($dosage->form); - $this->typify($id,$fid,"Form","".$dosage->form); + // manufacturers + $this->AddText($x,$did,"manufacturers","manufacturer",parent::getVoc()."manufacturer"); // @TODO RESOURCE - if($dosage->strength != '') { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."strength", $dosage->strength) - ); + // prices + if(isset($x->prices->price)) { + foreach($x->prices->price AS $product) { + $pid = parent::getRes().md5($product->description); + parent::addRDF( + parent::describeIndividual($pid,$product->description,parent::getVoc()."Pharmaceutical",$product->description). + parent::describeClass(parent::getVoc()."Pharmaceutical","pharmaceutical"). + parent::triplifyString($pid,parent::getVoc()."price","".$product->cost,"xsd:float"). + parent::triplify($did, parent::getVoc()."product", $pid) + ); + + $uid = parent::getVoc().md5($product->unit); + parent::addRDF( + parent::describeIndividual($uid,$product->unit,parent::getVoc()."Unit",$product->unit). + parent::describeClass(parent::getVoc()."Unit","unit"). + parent::triplify($pid,parent::getVoc()."form",$uid) + ); + } } - } - } - - // experimental-properties - $props = array("experimental-properties","calculated-properties"); - foreach($props AS $prop) { - $subtype = substr($prop,0,strpos("-",$prop)); - if(isset($x->{$prop})) { - foreach($x->{$prop} AS $properties) { - foreach($properties AS $property) { - $type = (string) $property->kind; - $value = (string) $property->value; - $type_uri = parent::getVoc().ucfirst(str_replace(" ","-",$type)); - - $id = parent::getRes().$prop."-".$dbid."-".($counter++); - $label = $property->kind.": $value".($property->source == ''?'':" from ".$property->source); + + // dosages
Powder, for solution
Intravenous
+ if(isset($x->dosages->dosage)) { + foreach($x->dosages->dosage AS $dosage) { + $id = parent::getRes().md5($dosage->strength.$dosage->form.$dosage->route); + $label = (($dosage->strength != '')?$dosage->strength." ":"").$dosage->form." form with ".$dosage->route. " route"; + + parent::addRDF( + parent::describeIndividual($id,$label,parent::getVoc()."Dosage"). + parent::describeClass(parent::getVoc()."Dosage","Dosage"). + parent::triplify($did, parent::getVoc()."dosage", $id) + ); + + $rid = parent::getVoc().md5($dosage->route); + $this->typify($id,$rid,"Route","".$dosage->route); + + $fid = parent::getVoc().md5($dosage->form); + $this->typify($id,$fid,"Form","".$dosage->form); + + if($dosage->strength != '') { parent::addRDF( - parent::describeIndividual($id,$label,$type_uri). - parent::describeClass($type_uri,$type,parent::getVoc().ucfirst($prop)). - parent::describeClass(parent::getVoc().ucfirst($prop),str_replace("-"," ",$prop)). - parent::triplifyString($id,$this->getVoc()."value",$value). - parent::triplify($did,$this->getVoc().$prop,$id) + parent::triplifyString($id, parent::getVoc()."strength", $dosage->strength) ); - - // Source - if(isset($property->source)) { - foreach($property->source AS $source) { - $s = (string) $source; - if($s == '') continue; - $sid = parent::getRes().md5($s); - parent::addRDF( - parent::describeIndividual($sid,$s,parent::getVoc()."Source"). - parent::describeClass(parent::getVoc()."Source","Source"). - parent::triplify($id,parent::getVoc()."source",$sid) - ); - } - } - } - } + } + } } - } + + // experimental-properties + $props = array("experimental-properties","calculated-properties"); + foreach($props AS $prop) { + $subtype = substr($prop,0,strpos("-",$prop)); + if(isset($x->{$prop})) { + foreach($x->{$prop} AS $properties) { + foreach($properties AS $property) { + $type = (string) $property->kind; + $value = (string) $property->value; + $type_uri = parent::getVoc().ucfirst(str_replace(" ","-",$type)); + + $id = parent::getRes().$prop."-".$dbid."-".($counter++); + $label = $property->kind.": $value".($property->source == ''?'':" from ".$property->source); + parent::addRDF( + parent::describeIndividual($id,$label,$type_uri). + parent::describeClass($type_uri,$type,parent::getVoc().ucfirst($prop)). + parent::describeClass(parent::getVoc().ucfirst($prop),str_replace("-"," ",$prop)). + parent::triplifyString($id,$this->getVoc()."value",$value). + parent::triplify($did,$this->getVoc().$prop,$id) + ); + + // Source + if(isset($property->source)) { + foreach($property->source AS $source) { + $s = (string) $source; + if($s == '') continue; + $sid = parent::getRes().md5($s); + parent::addRDF( + parent::describeIndividual($sid,$s,parent::getVoc()."Source"). + parent::describeClass(parent::getVoc()."Source","Source"). + parent::triplify($id,parent::getVoc()."source",$sid) + ); + } + } + } + } + } + } - // identifiers - // RE40183United States1996-04-09 2016-04-09 - if(isset($x->patents->patent)) { - foreach($x->patents->patent AS $patent) { - $id = "uspto:".$patent->number; + // identifiers + // RE40183United States1996-04-09 2016-04-09 + if(isset($x->patents->patent)) { + foreach($x->patents->patent AS $patent) { + $id = "uspto:".$patent->number; - parent::addRDF( - parent::triplify($did,$this->getVoc()."patent",$id). - parent::describeIndividual($id,$patent->country." patent ".$patent->number,$this->getVoc()."Patent"). - parent::describeClass(parent::getVoc()."Patent","patent"). - parent::triplifyString($id,$this->getVoc()."approved","".$patent->approved). - parent::triplifyString($id,$this->getVoc()."expires","".$patent->expires) - ); + parent::addRDF( + parent::triplify($did,$this->getVoc()."patent",$id). + parent::describeIndividual($id,$patent->country." patent ".$patent->number,$this->getVoc()."Patent"). + parent::describeClass(parent::getVoc()."Patent","patent"). + parent::triplifyString($id,$this->getVoc()."approved","".$patent->approved). + parent::triplifyString($id,$this->getVoc()."expires","".$patent->expires) + ); - $cid = parent::getRes().md5($patent->country); - $this->typify($id,$cid,"Country","".$patent->country); - } - } + $cid = parent::getRes().md5($patent->country); + $this->typify($id,$cid,"Country","".$patent->country); + } + } - // partners - $partners = array('target','enzyme','transporter','carrier'); - foreach($partners AS $partner) { - $plural = $partner.'s'; - if(isset($x->$plural)) { - foreach($x->$plural AS $list) { - foreach($list->$partner AS $item) { - $this->parsePartnerEntry($did,$item,$partner); - parent::writeRDFBufferToWriteFile(); + // partners + $partners = array('target','enzyme','transporter','carrier'); + foreach($partners AS $partner) { + $plural = $partner.'s'; + if(isset($x->$plural)) { + foreach($x->$plural AS $list) { + foreach($list->$partner AS $item) { + $this->parsePartnerRelation($did,$item,$partner); + parent::writeRDFBufferToWriteFile(); + } } } } - } - // drug-interactions - $y = (int) substr($dbid,2); - if(isset($x->{"drug-interactions"})) { - foreach($x->{"drug-interactions"} AS $ddis) { - foreach($ddis->{"drug-interaction"} AS $ddi) { - $dbid2 = $ddi->{'drugbank-id'}; - if($dbid < $dbid2) { // don't repeat - $ddi_id = parent::getRes().$dbid."_".$dbid2; - parent::addRDF( - parent::triplify("drugbank:".$dbid,parent::getVoc()."ddi-interactor-in","".$ddi_id). - parent::triplify("drugbank:".$dbid2,parent::getVoc()."ddi-interactor-in","".$ddi_id). - parent::describeIndividual($ddi_id,"DDI between $name and ".$ddi->name." - ".$ddi->description,parent::getVoc()."Drug-Drug-Interaction"). - parent::describeClass(parent::getVoc()."Drug-Drug-Interaction","drug-drug interaction") - ); - } - } - } - } - - // food-interactions - $this->AddText($x,$did,"food-interactions","food-interaction",parent::getVoc()."food-interaction"); - - // affected-organisms - $this->AddCategory($x,$did,"affected-organisms","affected-organism",parent::getVoc()."affected-organism"); - - // - if(isset($x->{"external-identifiers"})) { - foreach($x->{"external-identifiers"} AS $objs) { - foreach($objs AS $obj) { - $ns = $this->NSMap($obj->resource); - $id = $obj->identifier; - if($ns == "genecards") $id = str_replace(array(" "),array("_"),$id); - - parent::addRDF( - parent::triplify($did,parent::getVoc()."x-$ns","$ns:$id") - ); - } - } - } - // - if(isset($x->{"external-links"})) { - foreach($x->{"external-links"}->{'external-link'} AS $el) { - if(strpos($el->url,'http') !== false) { - parent::addRDF( - parent::triplify($did,"rdfs:seeAlso","".$el->url) - ); - } - } - } - parent::writeRDFBufferToWriteFile(); - } + // drug-interactions + $y = (int) substr($dbid,2); + if(isset($x->{"drug-interactions"})) { + foreach($x->{"drug-interactions"} AS $ddis) { + foreach($ddis->{"drug-interaction"} AS $ddi) { + $dbid2 = $ddi->{'drugbank-id'}; + if($dbid < $dbid2) { // don't repeat + $ddi_id = parent::getRes().$dbid."_".$dbid2; + parent::addRDF( + parent::triplify("drugbank:".$dbid,parent::getVoc()."ddi-interactor-in","".$ddi_id). + parent::triplify("drugbank:".$dbid2,parent::getVoc()."ddi-interactor-in","".$ddi_id). + parent::describeIndividual($ddi_id,"DDI between $name and ".$ddi->name." - ".$ddi->description,parent::getVoc()."Drug-Drug-Interaction"). + parent::describeClass(parent::getVoc()."Drug-Drug-Interaction","drug-drug interaction") + ); + } + } + } + } - function AddLinkedResource(&$x, $id, $list_name,$item_name,$ns) - { - if(isset($x->$list_name)) { - foreach($x->$list_name AS $item) { - if(isset($item->$item_name) && ($item->$item_name != '')) { - if($item_name == "atc-code") { - $l = $ns.":".$item->$item_name->attributes()->code; - } else { - $l = $ns.":".$item->$item_name; + // food-interactions + $this->AddText($x,$did,"food-interactions","food-interaction",parent::getVoc()."food-interaction"); + + // affected-organisms + $this->AddCategory($x,$did,"affected-organisms","affected-organism",parent::getVoc()."affected-organism"); + + // + if(isset($x->{"external-identifiers"})) { + foreach($x->{"external-identifiers"} AS $objs) { + foreach($objs AS $obj) { + $ns = $this->NSMap($obj->resource); + $id = $obj->identifier; + if($ns == "genecards") $id = str_replace(array(" "),array("_"),$id); + + parent::addRDF( + parent::triplify($did,parent::getVoc()."x-$ns","$ns:$id") + ); + if($ns == "pubchemcompound") { + parent::addRDF( + parent::triplify("$ns:$id","skos:exactMatch","http://rdf.ncbi.nlm.nih.gov/pubchem/compound/$id") + ); + } + + } } - $this->addRDF($this->triplify($id,parent::getVoc()."x-$ns",trim($l))); - } - } - } - } + } + // + if(isset($x->{"external-links"})) { + foreach($x->{"external-links"}->{'external-link'} AS $el) { + if(strpos($el->url,'http') !== false) { + parent::addRDF( + parent::triplify($did,"rdfs:seeAlso","".$el->url) + ); + } + } + } + parent::writeRDFBufferToWriteFile(); + } - function AddText(&$x, $id, $list_name,$item_name,$predicate, $list_item_name = null) - { - if(isset($x->$list_name)) { - foreach($x->$list_name AS $item) { - if(isset($item->$item_name) && ($item->$item_name != '')) { - $l = $item->$item_name; - if(isset($l->$list_item_name)) { - foreach($l->$list_item_name AS $k) { - $kid = parent::getRes().md5($k); - $this->addRDF( - $this->describeIndividual($kid,"$item_name for $id",parent::getVoc().ucfirst($item_name)). - $this->describeClass(parent::getVoc().ucfirst($item_name),$item_name). - $this->triplifyString($kid,"rdf:value",$k). - $this->triplify($id,$predicate,$kid) - ); - } - } else { + function AddLinkedResource(&$x, $id, $list_name,$item_name,$ns) + { + if(isset($x->$list_name)) { + foreach($x->$list_name AS $item) { + if(isset($item->$item_name) && ($item->$item_name != '')) { + if($item_name == "atc-code") { + $l = $ns.":".$item->$item_name->attributes()->code; + } else { + $l = $ns.":".$item->$item_name; + } + $this->addRDF($this->triplify($id,parent::getVoc()."x-$ns",trim($l))); + } + } + } + } + + function AddText(&$x, $id, $list_name,$item_name,$predicate, $list_item_name = null) + { + if(isset($x->$list_name)) { + foreach($x->$list_name AS $item) { + if(isset($item->$item_name) && ($item->$item_name != '')) { + $l = $item->$item_name; + if(isset($l->$list_item_name)) { + foreach($l->$list_item_name AS $k) { + $kid = parent::getRes().md5($k); + $this->addRDF( + $this->describeIndividual($kid,"$item_name for $id",parent::getVoc().ucfirst($item_name)). + $this->describeClass(parent::getVoc().ucfirst($item_name),$item_name). + $this->triplifyString($kid,"rdf:value",$k). + $this->triplify($id,$predicate,$kid) + ); + } + } else { $kid = parent::getRes().md5($l); - $this->addRDF( + $this->addRDF( $this->describeIndividual($kid,"$item_name for $id",parent::getVoc().ucfirst($item_name)). $this->describeClass(parent::getVoc().ucfirst($item_name),$item_name). $this->triplifyString($kid,"rdf:value",$l). $this->triplify($id,$predicate,$kid) ); - } - } - } - } - } + } + } + } + } + } function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null) { @@ -709,60 +720,57 @@ function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_na foreach($x->$list_name AS $item) { if(isset($item->$item_name) && ($item->$item_name != '')) { $l = $item->$item_name; - if(isset($l->$list_item_name)) { - foreach($l->$list_item_name AS $k) { - $kid = parent::getVoc().ucfirst(str_replace(" ","-",$k)); - $this->addRDF( - $this->describeIndividual($kid,ucfirst($k),parent::getVoc().ucfirst($item_name)). - $this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)). - $this->triplify($id,$predicate,$kid) - ); - } - } else { - if($item_name == "synonym") { - $kid = parent::getvoc().md5($l); - } else { - $kid = parent::getVoc().ucfirst(str_replace(" ","-",$l)); - } + $att = ($l->attributes()); + foreach($l AS $key => $item_value) { + $kid = parent::getvoc().md5($item_value); $this->addRDF( - $this->describeIndividual($kid,ucfirst($l),parent::getVoc().ucfirst($item_name)). - $this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)). + $this->describeIndividual($kid,"".$item_value,parent::getVoc().ucfirst($item_name)). + $this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst("".$item_name)). $this->triplify($id,$predicate,$kid) ); - foreach($l->attributes() AS $ka => $va) { + foreach($att AS $ka => $va) { parent::addRDF( - $this->triplifyString($kid, parent::getVoc().$ka, $va) + $this->triplifyString($kid, parent::getVoc().$ka, "".$va) ); } } + $kid = parent::getvoc().md5($l->asXML()); + foreach($l->children() AS $k2 => $v2) { + $this->addRDF( + $this->describeIndividual($kid,($k2=="name"?$v2:$predicate),parent::getVoc().ucfirst($k2)). + $this->describeClass(parent::getVoc().ucfirst($k2),ucfirst("".$v2)). + $this->triplifyString($kid, parent::getVoc().$k2, $v2). + $this->triplify($id, $predicate, $kid) + ); + } } } } } - + function AddList(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null) -{ - if(isset($x->$list_name)) { - foreach($x->$list_name->$item_name AS $k => $item) { - if(isset($item->$item_name)) { - foreach($item->$item_name AS $k => $v) { - $mylist[] = ''.$v; - } - } else $mylist[] = ''.$item; + { + if(isset($x->$list_name)) { + foreach($x->$list_name->$item_name AS $k => $item) { + if(isset($item->$item_name)) { + foreach($item->$item_name AS $k => $v) { + $mylist[] = ''.$v; + } + } else $mylist[] = ''.$item; + } } - } - if(isset($mylist)) { - foreach($mylist AS $item) { - $label = ''.$item; - $kid = parent::getVoc().ucfirst(str_replace(" ","-",$label)); // generate a new identifier for the list item - $this->addRDF( - $this->describeIndividual($kid,$label,parent::getVoc().ucfirst($item_name)). - $this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)). - $this->triplify($id,$predicate,$kid) - ); + if(isset($mylist)) { + foreach($mylist AS $item) { + $label = ''.$item; + $kid = parent::getVoc().ucfirst(str_replace(" ","-",$label)); // generate a new identifier for the list item + $this->addRDF( + $this->describeIndividual($kid,$label,parent::getVoc().ucfirst($item_name)). + $this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)). + $this->triplify($id,$predicate,$kid) + ); + } } } -} } // end class diff --git a/goa/goa.php b/goa/goa.php index 1c5a067..9d3f6d6 100644 --- a/goa/goa.php +++ b/goa/goa.php @@ -36,7 +36,8 @@ class GOAParser extends Bio2RDFizer { function __construct($argv) { parent::__construct($argv,"goa"); - parent::addParameter('files',true,'all|arabidopsis|chicken|cow|dicty|dog|fly|human|mouse|pdb|pig|rat|uniprot|worm|yeast|zebrafish','all','all or comma-separated list of files to process'); +// parent::addParameter('files',true,'all|arabidopsis|chicken|cow|dicty|dog|fly|human|mouse|pdb|pig|rat|uniprot|worm|yeast|zebrafish','all','all or comma-separated list of files to process'); + parent::addParameter('files',true,'all|arabidopsis|chicken|cow|dicty|dog|fly|human|mouse|pig|rat|worm|yeast|zebrafish','all','all or comma-separated list of files to process'); parent::addParameter('download_url',false,null,'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/'); parent::initialize(); } @@ -161,9 +162,13 @@ function process($file){ //entity id $eid = $this->getdbURI($db,$id); +if(!$eid) { +print_r($fields); +continue; +} parent::addRDF( parent::describeIndividual($eid,$label,parent::getVoc()."GO-Annotation"). - parent::describeClass(parent::getVoc()."GO-Annotation","GO Annotation"). + parent::describeClass(parent::getVoc()."GO-Annotation","GO Annotation"). parent::triplifyString($eid,parent::getVoc()."symbol",$symbol) ); parent::addRDF( @@ -261,6 +266,8 @@ function getdbURI($db_id, $db_object_id){ } else if ($db_id == "PDB"){ $split_object = explode("_", $db_object_id); $returnMe = "pdb:".$split_object[0]."/chain_".$split_object[1]; + } else { + $returnMe = $db_id.":".$db_object_id; } return $returnMe; } diff --git a/hgnc/hgnc.php b/hgnc/hgnc.php index 94ab10d..164bcf5 100755 --- a/hgnc/hgnc.php +++ b/hgnc/hgnc.php @@ -117,11 +117,13 @@ function Run(){ function process(){ $header = $this->GetReadFile()->Read(200000); $header_arr = explode("\t", $header); - - if (count($header_arr) != 40) + $n = 41; + $c = count($header_arr); + if ($c != $n) { echo PHP_EOL; - trigger_error ("Header format is different than expected, please update the script",E_USER_ERROR); + print_r($header_arr); + trigger_error ("Expected $n columns, found $c . please update the script",E_USER_ERROR); exit; } @@ -164,9 +166,10 @@ function process(){ $refseq_mappeddatasuppliedbyNCBI = $fields[34]; $uniprot_id_mappeddatasuppliedbyUniProt = $fields[35]; $ensembl_id_mappeddatasuppliedbyEnsembl = $fields[36]; - $ucsc_id_mappeddatasuppliedbyUCSC = $fields[37]; - $mouse_genome_database_id_mappeddatasuppliedbyMGI = $fields[38]; - $rat_genome_database_id_mappeddatasuppliedbyRGD = $fields[39]; + $vega_id_mappeddatasuppliedbyVega = $fields[37]; + $ucsc_id_mappeddatasuppliedbyUCSC = $fields[38]; + $mouse_genome_database_id_mappeddatasuppliedbyMGI = $fields[39]; + $rat_genome_database_id_mappeddatasuppliedbyRGD = $fields[40]; $id_res = $id; $id_label = "Gene Symbol for ".$approved_symbol; @@ -464,6 +467,15 @@ function process(){ } } + if(!empty($ucsc_id_mappeddatasuppliedbyVega)){ + $ucsc_id_mappeddatasuppliedbyVega = explode(", ", $ucsc_id_mappeddatasuppliedbyVega); + foreach ($ucsc_id_mappeddatasuppliedbyVega as $vega_id) { + parent::AddRDF( + parent::triplify($id_res, $this->getVoc()."x-vega", "vega:".trim($vega_id)). + parent::describeProperty($this->getVoc()."x-vega", "Vega entry") + ); + } + } if(!empty($ucsc_id_mappeddatasuppliedbyUCSC)){ $ucsc_id_mappeddatasuppliedbyUCSC = explode(", ", $ucsc_id_mappeddatasuppliedbyUCSC); foreach ($ucsc_id_mappeddatasuppliedbyUCSC as $ucsc_id) { diff --git a/irefindex/irefindex.php b/irefindex/irefindex.php index d5a7ceb..d6d1c0d 100644 --- a/irefindex/irefindex.php +++ b/irefindex/irefindex.php @@ -33,8 +33,8 @@ class irefindexParser extends Bio2RDFizer { function __construct($argv) { // parent::__construct($argv,"irefindex"); - parent::addParameter('files',true,'all|10090|10116|4932|559292|562|6239|7227|9606|A','all','all or comma-separated list of files to process'); - parent::addParameter('version',false,'08122013|03022013|10182011','08122013','dated version of files to download'); + parent::addParameter('files',true,'all|10090|10116|4932|559292|562|6239|7227|9606','all','all or comma-separated list of files to process'); + parent::addParameter('version',false,'07042015|08122013|03022013|10182011','07042015','dated version of files to download'); parent::addParameter('download_url',false,null,'http://irefindex.org/download/irefindex/data/current/psi_mitab/MITAB2.6/'); parent::initialize(); } @@ -55,9 +55,8 @@ function Run() foreach($files AS $file) { $download = parent::getParameterValue('download'); - - $base_file = ucfirst($file).".mitab.".parent::getParameterValue("version").".txt"; - $zip_file = $base_file.".zip"; + $version = parent::getParameterValue("version"); + $zip_file = ucfirst($file).".mitab.".$version.".txt.zip"; $lfile = $ldir.$zip_file; $gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true; @@ -82,9 +81,14 @@ function Run() trigger_error("Unable to open $lfile"); exit; } + if($zin->numFiles != 1) { + trigger_error("Found more than one file ... using first file"); + } + $f = $zin->statIndex(0); + $base_file = $f['name']; if(($fp = $zin->getStream($base_file)) === FALSE) { - trigger_error("Unable to get $base_file in ziparchive $lfile"); - return FALSE; + trigger_error("Unable to get $base_file in ziparchive $lfile"); + return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); diff --git a/kegg/kegg.php b/kegg/kegg.php index 4bf7e0a..db6dcf6 100644 --- a/kegg/kegg.php +++ b/kegg/kegg.php @@ -230,6 +230,7 @@ function process($db) parent::describeIndividual($uri,$name,parent::getVoc().ucfirst($db)). parent::describeClass(parent::getVoc().ucfirst($db),"KEGG $db"). parent::triplifyString($uri,parent::getVoc()."internal-id",$nsid) + // http://www.kegg.jp/entry/ ); // now get the entries for each @@ -248,6 +249,23 @@ function process($db) echo "parsing $nsid ... "; $this->parseEntry($lfile); parent::writeRDFBufferToWriteFile(); + + if($db === "pathway") { + $ko = str_replace("map","ko",$id); + $lfile = $ldir.$id.".kgml"; + $rfile = "http://www.kegg.jp/kegg-bin/download?entry=".$ko."&format=kgml"; + if(!file_exists($lfile) || parent::getParameterValue('download') == 'true') { + echo "downloading KGML for $nsid "; + $ret = utils::downloadSingle($rfile,$lfile); + if($ret === false) { + echo "unable to download ".$nsid." ... skipping".PHP_EOL; + continue; + } + echo "done. "; + } + $this->parseKGML($lfile); + parent::writeRDFBufferToWriteFile(); + } echo "done!".PHP_EOL; } } @@ -719,6 +737,99 @@ function parseEntry($lfile) fclose($fp); } + function parseKGML($lfile) + { + $pathway = simplexml_load_file($lfile); + if($pathway === false) { + echo "Error in parsing $lfile".PHP_EOL; + return; + } + $pathway_id = str_replace("path","kegg",$pathway['name']); + $base_id = str_replace("kegg","kegg_resource",$pathway_id)."."; + + parent::addRDF( + parent::describeIndividual($pathway_id, $pathway['title'], parent::getVoc()."Pathway"). + parent::triplify($pathway_id, "rdfs:seeAlso", $pathway['link']). + parent::triplify($pathway_id, "foaf:depiction", $pathway['image']) + ); + + // get the entries + foreach($pathway->children() as $type => $item) { + if($type == "entry") { + $eid = $base_id.$item['id']; + $entries[ "".$item['id']] = "".$item['name']; + + parent::addRDF( + parent::describeIndividual($eid, $item['name'], parent::getVoc()."Ortholog-Group"). + parent::describeClass(parent::getVoc()."Ortholog-Group", "KEGG Ortholog Group") + ); + $mids = explode(" ",$item['name']); + foreach($mids AS $mid) { + if($item['type'] == 'path') $mid = str_replace($mid,":","_"); + else { + $mid = substr($mid, strpos($mid,":")+1); + } + + parent::addRDF( + parent::triplify($eid, parent::getVoc()."member", "kegg:".$mid) + ); + } + } + } + + // iterate over the relations, reactions + foreach($pathway->children() as $type => $item) { + if($type == "relation") { + /* + + + + + + + + */ + $id1 = "".$item['entry1']; $id2 = "".$item['entry2'];$type = "".$type; + $relation_id = str_replace("kegg","kegg_resource",$pathway_id).".".$id1.".".$id2.".".$type; + $label = $type." relation between ".$entries[ $id1 ]. " and ".$entries[ $id2 ]; + + parent::addRDF( + parent::describeIndividual($relation_id, $label, parent::getVoc()."Pathway-Relation"). + parent::describeClass(parent::getVoc()."Pathway-Relation","KEGG Pathway Relation"). + parent::triplify($relation_id, parent::getVoc()."source", $base_id.$id1). + parent::triplify($relation_id, parent::getVoc()."target", $base_id.$id2). + parent::triplifyString($relation_id, parent::getVoc()."type", $item['type']) + ); + foreach($item->children() as $subtype) { + parent::addRDF( + parent::triplifyString($relation_id, parent::getVoc()."subtype", ''.$subtype['name']) + ); + } + + } else if($type == "reaction") { + /* + + + + */ + $reaction_id = str_replace("kegg","kegg_resource",$pathway_id).".".substr($item['name'], strpos($item['name'],":")+1); + $reaction_type = parent::getVoc().ucfirst($item['type'])."-Reaction"; + parent::addRDF( + parent::describeIndividual($reaction_id, $item['name'], parent::getVoc()."Reaction"). + parent::describeClass(parent::getVoc()."Reaction", "KEGG Reaction"). + parent::triplify($reaction_id, "rdf:type", $reaction_type) + ); + + foreach($item->children() AS $k => $v) { + $cid = str_replace("cpd:","kegg:",$v['name']); + parent::addRDF( + parent::triplify($reaction_id, parent::getVoc().$k, $cid) + ); + } + } + } + return; + } } diff --git a/lsr/lsr.php b/lsr/lsr.php index 19d8f48..df8fb93 100644 --- a/lsr/lsr.php +++ b/lsr/lsr.php @@ -47,7 +47,7 @@ function run() $ifile = "registry.csv"; $lfile = $idir.$ifile; if(!file_exists($lfile) or parent::getParameterValue("download") == "true") { - echo "Downloading regsitry"; + echo "Downloading registry"; utils::downloadSingle($rfile,$lfile); echo "done".PHP_EOL; } @@ -109,17 +109,11 @@ function parse() if(!isset($r['preferredPrefix'])) continue; $dataset = $r['preferredPrefix']; $id = $this->getNamespace().$r['preferredPrefix']; - + parent::addRDF( - parent::QQuad($id,"rdf:type","dcat:Dataset"). - parent::QQuad($id,"rdf:type","lsr:Dataset"). - parent::QQuadL($id,"dc:title",$r['title']). - parent::QQuadL($id,"dc:description",$r['description']). - parent::QQuadL($id,"rdfs:label", $r['title']." [".$id."]"). - parent::QQuadL($id,"dc:identifier",$id). - parent::QQuadL($id,"bio2rdf:identifier", $r['preferredPrefix']). - parent::QQuadL($id,"bio2rdf:namespace", "lsr") -// parent::describeIndividual($id,$r['title'],"dcat:Dataset",$r['title'],$r['description']) + parent::describeIndividual($id,$r['title'],"lsr_vocabulary:Dataset",$r['title'],$r['description']). + parent::describeClass("lsr_vocabulary:Dataset","LSR Dataset"). + parent::triplify($id,"rdf:type","dctypes:Dataset") ); parent::addRDF( parent::triplifyString($id,"idot:preferredPrefix",$r['preferredPrefix']) @@ -130,25 +124,27 @@ function parse() if(trim($syn) == '') continue; $syn = $this->getRegistry()->normalizePrefix(preg_replace("/\([^\)]+/","",$syn)); parent::addRDF( - parent::QQuadL($id,"idot:alternativePrefix",$syn) + parent::triplifyString($id,"idot:alternativePrefix",$syn) ); } } if($r['providerURI']) { - parent::QQuad($id,"void:uriRegexPattern",$r['providerURI']); + parent::triplifyString($id,"void:uriRegexPattern",$r['providerURI'], "xsd:anyUri"); } if($r['alternateURI']) { foreach( explode(",",$r['alternateURI']) AS $alt_uri) { - parent::addRDF( - parent::QQuad($id,"void:uriRegexPattern",$alt_uri) - ); + if(trim($alt_uri) != '') { + parent::addRDF( + parent::triplifyString($id,"void:uriRegexPattern",$alt_uri, "xsd:anyUri") + ); + } } } if($r['miriam']) { foreach(explode(",",$r['miriam']) AS $miriam) { $miriam_id = str_replace("MIR:","",$miriam); parent::addRDF( - parent::QQuad($id,$this->getVoc()."x-miriam","miriam:$miriam_id"). + parent::triplify($id,$this->getVoc()."x-miriam","miriam:$miriam_id"). parent::triplify("miriam:$miriam_id","bio2rdf_vocabulary:url",'http://identifiers.org/'.$dataset) ); } @@ -157,7 +153,7 @@ function parse() foreach(explode(",",$r['biodbcore']) AS $biodbcore_id) { $biodbcore = "biodbcore:$biodbcore_id"; parent::addRDF( - parent::QQuad($id,$this->getVoc()."x-biodbcore",$biodbcore). + parent::triplify($id,$this->getVoc()."x-biodbcore",$biodbcore). parent::triplify($biodbcore,"bio2rdf_vocabulary:url","http://www.biosharing.org/$biodbcore_id") ); } @@ -165,44 +161,44 @@ function parse() if($r['bioportal']) { foreach(explode(",",$r['bioportal']) AS $bioportal) { parent::addRDF( - parent::QQuad($id,$this->getVoc()."x-bioportal","bioportal:".$bioportal) + parent::triplify($id,$this->getVoc()."x-bioportal","bioportal:".$bioportal) ); } } if($r['datahub']) { parent::addRDF( - parent::QQuad($id,$this->getVoc()."x-datahub","datahub:".$r['datahub']) + parent::triplify($id,$this->getVoc()."x-datahub","datahub:".$r['datahub']) ); } if($r['pubmed']) { foreach(explode(",",$r['pubmed']) AS $pubmed) { parent::addRDF( - parent::QQuad($id,"cito:citesAsAuthority","pubmed:".$pubmed). - parent::QQuad("pubmed:".$pubmed, "rdf:type", "pubmed_vocabulary:Resource") + parent::triplify($id,"cito:citesAsAuthority","pubmed:".$pubmed) +// parent::triplify("pubmed:".$pubmed, "rdf:type", "pubmed_vocabulary:Resource") ); } } if($r['abbreviation']) { parent::addRDF( - parent::QQuadL($id,"dc:alternative",$r['abbreviation']) + parent::triplifyString($id,"dc:alternative",$r['abbreviation']) ); } if($r['organization']) { $pid = parent::getRes().md5($r['organization']); parent::addRDF( - parent::QQuad($id,"dc:publisher", $pid). - parent::QQuadL($pid, "dc:title", $r['organization']) + parent::triplify($id,"dc:publisher", $pid). + parent::triplifyString($pid, "dc:title", $r['organization']) ); } if($r['type']) { parent::addRDF( - parent::QQuadL($id,$this->getVoc()."type",$r['type']) + parent::triplifyString($id,$this->getVoc()."type",$r['type']) ); } foreach( explode(",",$r['keywords']) AS $keyword) { if($keyword) { parent::addRDF( - parent::QQuadL($id,"dcat:keyword",$keyword) + parent::triplifyString($id,"dcat:keyword",$keyword) ); } } @@ -210,38 +206,38 @@ function parse() && $r['homepage'] !== 'dead' && $r['homepage'] !== 'unavailable') { parent::addRDF( - parent::QQuad($id,"foaf:page",$r['homepage']) + parent::triplify($id,"foaf:page",$r['homepage']) ); } if($r['license']) { parent::addRDF( - parent::QQuad($id,"dc:license",$r['license']) + parent::triplify($id,"dc:license",$r['license']) ); } if($r['licenseText']) { parent::addRDF( - parent::QQuadL($id,$this->getVoc()."license-text",$r['licenseText']) + parent::triplifyString($id,$this->getVoc()."license-text",$r['licenseText']) ); } foreach(explode(",",$r['rights']) AS $right) { if($right) { parent::addRDF( - parent::QQuadL($id,"dc:rights",$right) + parent::triplifyString($id,"dc:rights",$right) ); } } parent::addRDF( - parent::QQuadL($id,"idot:identifierPattern",$r['id_regex']) + parent::triplifyString($id,"idot:identifierPattern",$r['id_regex']) ); parent::addRDF( - parent::QQuadL($id,"idot:exampleIdentifier",$r['example_id']) + parent::triplifyString($id,"idot:exampleIdentifier",$r['example_id']) ); if($r['html_template'] && $r['html_template'] !== 'unavailable' && $r['html_template'] !== 'N/A') { parent::addRDF( - parent::QQuadL($id,"idot:accessPattern",$r['html_template']) + parent::triplifyString($id,"idot:accessPattern",$r['html_template']) ); } diff --git a/mgi/mgi.php b/mgi/mgi.php index 2be4e83..bbb1359 100644 --- a/mgi/mgi.php +++ b/mgi/mgi.php @@ -403,7 +403,7 @@ function MGI_Geno_Disease() if(!$allele) {echo "ignoring ".$a[0].PHP_EOL;continue;} $alleles = explode("|",strtolower($a[2])); - + $genotype = $a[0]; $diseases = explode(",",$a[7]); foreach($diseases AS $d) { $disease = "omim:$d"; @@ -413,6 +413,7 @@ function MGI_Geno_Disease() parent::addRDF( parent::describeIndividual($id, $label, $this->getVoc()."Allele-Disease-Association"). parent::describeClass($this->getVoc()."Allele-Disease-Association","MGI Allele-Disease Association"). + parent::triplifyString($id,$this->getVoc()."genotype-string",$genotype). parent::triplify($id,$this->getVoc()."allele",$allele). parent::triplify($id,$this->getVoc()."disease",$disease) ); @@ -450,6 +451,7 @@ function MGI_Geno_NotDisease() continue; } + $genotype = $a[0]; $alleles = explode("|",strtolower($a[2])); $diseases = explode(",",$a[7]); foreach($diseases AS $d) { @@ -462,6 +464,7 @@ function MGI_Geno_NotDisease() parent::describeIndividual($id, $label, $this->getVoc()."Allele-Disease-Non-Association"). parent::describeClass($this->getVoc()."Allele-Disease-Non-Association","MGI Allele-Disease Non-Association"). parent::triplify($id,$this->getVoc()."allele",$allele). + parent::triplifyString($id,$this->getVoc()."genotype-string",$genotype). parent::triplify($id,$this->getVoc()."disease",$disease). parent::triplifyString($id,$this->getVoc()."is-negated","true") ); diff --git a/miriam/miriam.php b/miriam/miriam.php new file mode 100644 index 0000000..31bd80d --- /dev/null +++ b/miriam/miriam.php @@ -0,0 +1,232 @@ +getParameterValue('indir'); + $odir = $this->getParameterValue('outdir'); + + // download and set the read file + $file = 'miriam.xml'; + $rfile = $this->getParameterValue("download_url"); + $lfile = $ldir.$file; + if(!file_exists($lfile) || $this->getParameterValue("download") == "true") { + utils::downloadSingle($rfile,$lfile); + } + + parent::setReadFile($lfile); + + // set the write file + $outfile = "miriam.".parent::getParameterValue('output_format'); + $gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true; + parent::setWriteFile(parent::getParameterValue("outdir").$outfile,$gz); + $this->parse(); + parent::WriteRDFBufferToWriteFile(); + $this->getWriteFile()->Close(); + + return true; + } + + function parse() + { + // convert into json + $lfile = parent::getReadFile()->getFileName(); + $xml = simplexml_load_file($lfile); + $json = json_encode($xml); + $db = json_decode($json,TRUE); + + // miriam metadata + // $attributes = $db['@attributes']; + foreach($db['datatype'] AS $item) { + $this->parseItem($item); + } + } + + function parseItem($item) + { + $id = $item['@attributes']['id']; + $label = $item['name']; + + parent::addRDF( + parent::describeIndividual($id, $item['name'], parent::getVoc()."Entry"). + parent::describeClass(parent::getVoc()."Entry","MIRIAM database entry"). + parent::triplifyString($id, parent::getVoc()."namespace", $item['namespace']) + ); + + if(isset($item['@attributes'])) { + foreach($item['@attributes'] AS $k => $v) { + parent::addRDF( + parent::triplifyString($id, parent::getVoc().$k, $v) + ); + } + } + if(isset($item['comment'])) parent::addRDF(parent::triplifyString($id, parent::getVoc()."comment", $item['comment'])); + if(isset($item['definition'])) parent::addRDF(parent::triplifyString($id, parent::getVoc()."definition", $item['definition'])); + if(isset($item['synonyms'])) { + $mylist = null; + if(is_array($item['synonyms']['synonym'])) $mylist = $item['synonyms']['synonym']; + else $mylist[] = $item['synonyms']['synonym']; + foreach($mylist AS $myitem) { + parent::addRDF( + parent::triplifyString($id, "skos:altLabel", $myitem) + ); + } + } + + if(isset($item['uris'])) { + foreach($item['uris']['uri'] AS $uri) { + parent::addRDF( + parent::triplifyString($id, parent::getVoc()."uri", $uri) + ); + } + } + if(isset($item['resources'])) { + $mylist = null; + if(!isset($item['resources']['resource']['dataEntry'])) $mylist = $item['resources']['resource']; + else $mylist[] = $item['resources']['resource']; + foreach($mylist AS $myitem) { + $rid = $myitem['@attributes']['id']; + parent::addRDF( + parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc()."Resource"). + parent::describeClass(parent::getVoc()."Resource", "MIRIAM Resource"). + parent::triplify($rid, parent::getVoc()."url", $myitem['dataResource']). + parent::triplifyString($rid, parent::getVoc()."urlTemplate", $myitem['dataEntry']). + parent::triplifyString($rid, parent::getVoc()."organization", is_array($myitem['dataInstitution'])?"":$myitem['dataInstitution']). + parent::triplifyString($rid, parent::getVoc()."location", is_array($myitem['dataLocation'])?"":$myitem['dataLocation']). + parent::triplify($id, parent::getVoc()."resource", $rid) + ); + } + } + if(isset($item['tags'])) { + $i = $item['tags']['tag']; + $mylist = null; + if(!is_array($i)) $mylist[] = $i; + else $mylist = $i; + foreach($mylist AS $myitem) { + parent::addRDF( + parent::triplifyString($id, parent::getvoc()."tag", $myitem) + ); + }} + + if(isset($item['documentations'])) { + $i = $item['documentations']['documentation']; + $mylist = null; + if(!is_array($i)) $mylist[] = $i; + else $mylist = $i; + foreach($mylist AS $myitem) { + if(strstr($myitem, "pubmed")) $uri = "pubmed:".substr($myitem, strrpos($myitem, ":")+1); + else if(strstr($myitem, "doi")) $uri = "http://dx.doi.org/".substr($myitem, strpos($myitem, "doi:")); + else $uri = $myitem; + + parent::addRDF( + parent::triplify($id, parent::getvoc()."documentation", $uri) + ); + }} + + if(isset($item['restrictions'])) { + $mylist = null; + if(!isset($item['restrictions']['restriction']['statement'])) $mylist = $item['restrictions']['restriction']; + else $mylist[] = $item['restrictions']['restriction']; + foreach($mylist AS $i => $myitem) { + $rid = parent::getRes().str_replace(":","",$id)."_".($i+1); + $a = $myitem['@attributes']; + $rid_type = parent::getVoc().'restriction_type_'.$a['type']; + + parent::addRDF( + parent::describeIndividual($rid, $a['desc'], parent::getVoc()."Restriction"). + parent::describeClass(parent::getVoc()."Restriction", "Resource Restriction"). + parent::triplify($rid, "rdf:type", $rid_type). + parent::describeClass($rid_type, $a['desc'], parent::getVoc()."Restriction"). + parent::triplifyString($rid, "dct:description", $myitem['statement']). + parent::triplify($rid, "foaf:page", isset($myitem['link'])?$myitem['link']:""). + parent::triplify($id, parent::getVoc()."restriction", $rid) + ); + }} + + /* + + + + reaction + event + rule + species + + + */ + if(isset($item['annotation'])) { + $mylist = null; + if(!isset($item['annotation']['format']['elements'])) $mylist = $item['annotation']['format']; + else $mylist[] = $item['annotation']['format']; + foreach($mylist AS $i => $myitem) { + $name = $myitem['@attributes']['name']; + $myid = str_replace("MIR:",parent::getRes(), $id)."_annotation_".($i+1)."_".urlencode($name); + parent::addRDF( + parent::describeIndividual($myid, "$label used by $name", parent::getVoc()."ValueSet"). + parent::describeClass(parent::getVoc()."ValueSet", "MIRIAM Value Set"). + parent::triplifyString($myid, parent::getVoc()."used-in", $name). + parent::triplify($myid, parent::getVoc()."uses", $id) + ); + + $b = $myitem['elements']['element']; + $mylist2 = null; + if(!is_array($b)) $mylist2[] = $b; + else $mylist2 = $b; + foreach($mylist2 AS $i => $e) { + parent::addRDF( + parent::triplifyString($myid, parent::getVoc()."used-for", $e) + ); + } + } + + } + + } + +} + +?> diff --git a/omim/omim.php b/omim/omim.php index d938f6a..89d68c1 100644 --- a/omim/omim.php +++ b/omim/omim.php @@ -34,7 +34,8 @@ function __construct($argv) { parent::__construct($argv, 'omim'); parent::addParameter('files',true,null,'all|omim#','entries to process: comma-separated list or hyphen-separated range'); parent::addParameter('omim_api_url',false,null,'http://api.omim.org/api/entry?include=all&format=json'); - parent::addParameter('omim_api_key',false,null,'D43076A680B921682DA253BEFE05DE998957B3FC'); + parent::addParameter('omim_api_key',false,null); + parent::addParameter('omim_api_key_file',false,null,'omim.key','A file containing your omim KEY'); parent::initialize(); } @@ -43,6 +44,19 @@ function Run() // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); + if(parent::getParameterValue('omim_api_key') == '') { + $key_file = parent::getParameterValue('omim_api_key_file'); + if(file_exists($key_file)) { + $key = trim(file_get_contents($key_file)); + if($key) { + parent::setParameterValue('omim_api_key', $key); + } else { + trigger_error("No API key found in the specified omim key file $key_file",E_USER_WARNING); + } + } else { + trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file $key_file",E_USER_WARNING); + } + } // get the list of mim2gene entries $entries = $this->GetListOfEntries($ldir); @@ -88,7 +102,8 @@ function Run() $total = count($entries); foreach($entries AS $omim_id => $type) { echo "processing ".(++$i)." of $total - omim# "; - $download_file = $ldir.$omim_id.".json"; + $download_file = $ldir.$omim_id.".json.gz"; + $gzfile = "compress.zlib://$download_file"; // download if the file doesn't exist or we are told to if(!file_exists($download_file) || parent::getParameterValue('download') == true) { // download using the api @@ -101,7 +116,7 @@ function Run() } // load entry, parse and write to file - $entry = json_decode(file_get_contents($download_file), true); + $entry = json_decode(file_get_contents($gzfile), true); $omim_id = trim((string)$entry["omim"]["entryList"][0]["entry"]['mimNumber']); echo $omim_id; $this->ParseEntry($entry,$type); @@ -184,8 +199,9 @@ function getListOfEntries($ldir) } // download + ftp_pasv($ftp, true); echo "Downloading $file ..."; - if(ftp_get($ftp, $ldir.$file, 'omim/'.$file, FTP_BINARY) === FALSE) { + if(ftp_get($ftp, $ldir.$file, 'OMIM/'.$file, FTP_BINARY) === FALSE) { trigger_error("Error in downloading $file"); continue; } @@ -480,7 +496,11 @@ function ParseEntry($obj, $type) foreach($o['phenotypeMapList'] AS $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes().$omim_id."_pm_".($i+1); - parent::addRDF(parent::triplify($omim_uri, parent::getVoc()."phenotype-map", $pm_uri)); + parent::addRDF( + parent::describeIndividual($pm_uri,"phenotype mapping for $omim_id", parent::getVoc()."Phenotype-Map"). + parent::describeClass(parent::getVoc()."Phenotype-Map","OMIM Phenotype-Map"). + parent::triplify($omim_uri, parent::getVoc()."phenotype-map", $pm_uri) + ); foreach(array_keys($phenotypeMap) AS $k) { if(in_array($k, array("mimNumber","phenotypeMimNumber","phenotypicSeriesMimNumber"))) { @@ -488,7 +508,7 @@ function ParseEntry($obj, $type) } else if($k == "geneSymbols") { $l = explode(", ",$phenotypeMap[$k]); foreach($l AS $gene) { - parent::addRDF(parent::triplify($pm_uri, parent::getVoc().$k, "hgnc.symbol:".$gene)); + parent::addRDF(parent::triplify($pm_uri, parent::getVoc()."gene-symbol", "hgnc.symbol:".$gene)); } } else if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); @@ -519,7 +539,8 @@ function ParseEntry($obj, $type) // external ids if(isset($o['externalLinks'])) { foreach($o['externalLinks'] AS $k => $id) { - + if($id === false) continue; + $ns = ''; switch($k) { case 'approvedGeneSymbols': $ns = 'symbol';break; @@ -544,10 +565,12 @@ function ParseEntry($obj, $type) case 'icd9cmIDs': $ns = 'icd9';break; case 'umlsIDs': $ns = 'umls';break; case 'wormbaseIDs': $ns = 'wormbase';break; - case 'diseaseOntologyIDs': $ns = 'do';break; + + case 'diseaseOntologyIDs': $ns = 'do';break; // specifically ignorning case 'geneTests': + case 'cmgGene': case 'geneticAllianceIDs': // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy @@ -560,7 +583,11 @@ function ParseEntry($obj, $type) case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': + case 'keggPathways': + case 'gtr': + case 'gwasCatalog': case 'mgiHumanDisease': + case 'wormbaseDO': case 'dermAtlas': // true/false break; @@ -571,11 +598,15 @@ function ParseEntry($obj, $type) $ids = explode(",",$id); foreach($ids AS $id) { if($ns) { - $b = explode(";;",$id); // multiple ids//names - foreach($b AS $c) { - preg_match("/([a-z])/",$c,$m); - if(!isset($m[1])) { - parent::addRDF(parent::triplify($omim_uri, parent::getVoc()."x-$ns", $ns.':'.$c)); + if(strstr($id,";;") === FALSE) { + parent::addRDF(parent::triplify($omim_uri, parent::getVoc()."x-$ns", $ns.':'.$id)); + } else { + $b = explode(";;",$id); // multiple ids//names + foreach($b AS $c) { + preg_match("/([a-z])/",$c,$m); + if(!isset($m[1])) { + parent::addRDF(parent::triplify($omim_uri, parent::getVoc()."x-$ns", $ns.':'.$c)); + } } } } diff --git a/pharmgkb/pharmgkb.php b/pharmgkb/pharmgkb.php index 37666e7..117323a 100644 --- a/pharmgkb/pharmgkb.php +++ b/pharmgkb/pharmgkb.php @@ -145,9 +145,10 @@ function run() exit; } $zipentries = array(); - if($file == "annotations") - $zipentries = array('clinical_ann.tsv','clinical_ann_metadata.tsv','var_drug_ann.tsv','var_pheno_ann.tsv','var_fa_ann.tsv','study_parameters.tsv'); - else if($file == "pathways") { + if($file == "annotations") { + // exclude: 'clinical_ann.tsv','study_parameters.tsv' + $zipentries = array('clinical_ann_metadata.tsv','var_drug_ann.tsv','var_pheno_ann.tsv','var_fa_ann.tsv'); + } else if($file == "pathways") { for( $i = 0; $i < $zin->numFiles; $i++ ){ $stat = $zin->statIndex( $i ); $entry = $stat['name']; @@ -182,8 +183,7 @@ function run() $this->GetReadFile()->SetFilePointer($fp); if($file == "annotations") { - if($zipentry == "clinical_ann_metadata.tsv") $fnx = "clinical_ann_metadata"; - else $fnx = 'annotations'; + $fnx = substr($zipentry,0,strpos($zipentry,".tsv")); echo "processing $zipentry.."; } else if($file == 'pathways') { $fnx = 'pathways'; @@ -357,6 +357,7 @@ function genes() $url = false; $x = $this->MapXrefs($xref, $url, $ns, $id2); + $ns = str_replace(' ','',$ns); if($url == true) { parent::addRDF( parent::QQuadO_URL($id, parent::getVoc()."x-$ns", $x) @@ -398,9 +399,16 @@ function MapXrefs($xref, &$url = false, &$ns = null, &$id = null) "refseqrna" => "refseq", "ucscgenomebrowser" => "refseq", "uniprotkb" => "uniprot", - 'genecard'=>'genecards' + 'genecard'=>'genecards', + 'ucscgenomebrowser' => 'refseq', + 'refseqrna' => 'refseq', + 'refseqprotein' => 'refseq', + 'refseqdna' => 'refseq', + 'comparativetoxicogenomicsdatabase' => 'ctd', + 'humancycgene' => 'humancyc' ); $this->getRegistry()->ParseQName($xref,$ns,$id); + $ns = str_replace(array('"',' '),'',$ns); if(isset($xrefs[$ns])) { $ns = $xrefs[$ns]; } @@ -496,10 +504,13 @@ function drugs() if(trim($a[6])) { // Cross References // drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine - $b = explode(',',trim($a[6])); + $b = explode(',',trim(str_replace('"','',$a[6]))); foreach($b as $c) { $this->getRegistry()->parseQName($c,$ns,$id1); - $ns = str_replace(array('keggcompound','keggdrug','drugbank','uniprotkb'), array('kegg','kegg','drugbank', 'uniprot'), strtolower($ns)); + $ns = str_replace(array('"',' '),'',$ns); + $ns = str_replace(array('keggcompound','keggdrug','drugbank','uniprotkb','clinicaltrials.gov','drugsproductdatabase(dpd)','nationaldrugcodedirectory','therapeutictargetsdatabase','fdadruglabelatdailymed'), + array('kegg','kegg','drugbank', 'uniprot','clinicaltrials','dpd','ndc','ttd','dailymed'), + strtolower(str_replace('"','',$ns))); if($ns == "url") { parent::addRDF( parent::QQuadO_URL($id, "rdfs:seeAlso", $id) @@ -553,7 +564,7 @@ function diseases() while($l = $this->GetReadFile()->Read(10000)) { $a = explode("\t",$l); - + $id = parent::getNamespace().$a[0]; $label = str_replace("'", "\\\'", $a[1]); @@ -586,19 +597,23 @@ function diseases() parent::addRDF( parent::triplify($id, "owl:sameAs", $sameID) ); - if(isset($a[4]) && trim($a[4]) != '') { - $d = preg_match_all('/[,]?([^\:]+):([A-Za-z0-9]+)\(([^\)]+)\)/',$a[4],$m, PREG_SET_ORDER); - foreach($m AS $n) { - if(isset($n[1]) && isset($n[2]) && !strstr($n[1]," ")) { - $n[1] = str_replace("),","",strtolower($n[1])); - $id2 = $n[1].':'.$n[2]; - parent::addRDF( - parent::triplify($id, "pharmgkb_vocabulary:x-".$n[1], $id2) - ); - if(isset($n[3]) && $n[2] != $n[3]){ + if(isset($a[4]) && trim($a[4]) != '') { + $xrefs = explode('","', $a[4]); + foreach($xrefs AS $xref) { + $xref = str_replace('"','',$xref); + $d = preg_match_all('/[,]?([^\:]+):([A-Za-z0-9]+)\(([^\)]+)\)/',$xref,$m, PREG_SET_ORDER); + foreach($m AS $n) { + if(isset($n[1]) && isset($n[2]) && !strstr($n[1]," ")) { + $n[1] = str_replace("),","",strtolower($n[1])); + $id2 = $n[1].':'.$n[2]; parent::addRDF( - parent::triplifyString($id2, "rdfs:label", str_replace(array("\'", "\""),array("\\\'", ""),$n[3])) + parent::triplify($id, "pharmgkb_vocabulary:x-".$n[1], $id2) ); + if(isset($n[3]) && $n[2] != $n[3]){ + parent::addRDF( + parent::triplifyString($id2, "rdfs:label", str_replace(array("\'", "\""),array("\\\'", ""),$n[3])) + ); + } } } } @@ -607,155 +622,6 @@ function diseases() } } - /* - 0 Position on hg18 - 1 RSID - 2 Name(s) - 3 Genes - 4 Feature - 5 Evidence - 6 Annotation - 7 Drugs - 8 Drug Classes - 9 Diseases - 10 Curation Level - 11 PharmGKB Accession ID - */ - function annotations() - { - $hash = ''; // md5 hash list - $h = explode("\t",$this->GetReadFile()->Read(100000)); // first line is header - if(count($h) != 12) { - trigger_error("Change in number of columns for variant annotations file",E_USER_ERROR); - return FALSE; - } - - while($l = $this->GetReadFile()->Read(10000)) { - $a = explode("\t",$l); - $id = parent::getNamespace().$a[11]; - $label = "variant annotation for ".$a[1]; - parent::addRDF( - parent::describeIndividual($id, $label, parent::getVoc()."Variant-Annotation"). - parent::describeClass(parent::getVoc()."Variant-Annotation", "PharmGKB Variant Annotation"). - parent::triplify($id, parent::getVoc()."variant", "dbsnp:".$a[1]). - parent::describeProperty(parent::getVoc()."variant", "Relationship between a PharmGKB entity and a variant") - ); - - if($a[2] != ''){ - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."variant_description", addslashes($a[2])). - parent::describeProperty(parent::getVoc()."variant_description", "Relationship between a PharmGKB variant annotation and a description") - ); - } - - if($a[3] != '' && $a[3] != '-') { - $genes = explode(", ",$a[3]); - foreach($genes AS $gene) { - $gene = str_replace("@","",$gene); - parent::addRDF( - parent::triplify($id, parent::getVoc()."gene", parent::getNamespace().$gene) - ); - } - parent::addRDF( - parent::describeProperty(parent::getVoc()."gene", "Relationship between a PharmGKB variant annotation and a gene") - ); - } - - if($a[4] != '') { - $features = explode(", ",$a[4]); - array_unique($features); - foreach($features AS $feature) { - $z = md5($feature); - parent::addRDF( - parent::describeIndividual(parent::getRes().$z, $feature, parent::getVoc()."Feature"). - parent::triplify($id, parent::getVoc()."feature", parent::getRes().$z) - ); - } - parent::addRDF( - parent::describeClass(parent::getVoc()."Feature", "PharmGKB variant annotation feature"). - parent::describeProperty(parent::getVoc()."feature", "Relationship between a PharmGKB variant annotation and a feature") - ); - } - if($a[5] != '') { - //PubMed ID:19060906; Web Resource:http://www.genome.gov/gwastudies/ - $evds = explode("; ",$a[5]); - foreach($evds AS $evd) { - $b = explode(":",$evd); - $key = $b[0]; - array_shift($b); - $value = implode(":",$b); - if($key == "PubMed ID"){ - parent::addRDF( - parent::triplify($id, parent::getVoc()."article", "pubmed:".$value). - parent::describeProperty(parent::getVoc()."article", "Relationship between a PharmGKB entity and a PubMed article identifier") - ); - } else if($key == "Web Resource"){ - parent::addRDF( - parent::QQuadO_URL($id, parent::getVoc()."url", $value). - parent::describeProperty(parent::getVoc()."url", "Relationship between a PharmGKB entity and a web resource") - ); - } - } - } - if($a[6] != '') { //annotation - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."description", str_replace("'", "\\\'", $a[6])). - parent::describeProperty(parent::getVoc()."description", "Relationship between a PharmGKB entity and its description") - ); - } - if($a[7] != '') { //drugs - $drugs = explode("; ",$a[7]); - foreach($drugs AS $drug) { - $find = array_search($drug, $drug_names_array); - if($find != FALSE){ - parent::addRDF( - parent::triplify($id, parent::getVoc()."drug", $find) - ); - } else { - $z = md5($drug); - parent::addRDF( - parent::describeIndividual(parent::getRes().$z, $drug, parent::getVoc()."Drug"). - parent::triplify($id, parent::getVoc()."drug", parent::getRes().$z) - ); - } - } - parent::addRDF( - parent::describeProperty(parent::getVoc()."drug", "Relationship between a PharmGKB variant annotation and a drug") - ); - } - - if($a[8] != '') { - $diseases = explode("; ",$a[8]); - foreach($diseases AS $disease) { - $disease = str_replace("'", "\\\'", $disease); - $find = array_search($disease, $disease_names_array); - - if($find != FALSE){ - parent::addRDF( - parent::triplify($id, parent::getVoc()."disease", $find) - ); - } else { - $z = md5($disease); - parent::addRDF( - parent::describeIndividual(parent::getRes().$z, $disease, parent::getVoc()."Disease"). - parent::triplify($id, parent::getVoc()."disease", parent::getRes().$z) - ); - } - } - parent::addRDF( - parent::describeProperty(parent::getVoc()."disease", "Relationship between a PharmGKB variant annotation and a disease") - ); - } - if(trim($a[9]) != '') { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."curation_status", trim($a[9])). - parent::describeProperty(parent::getVoc()."curation_status", "Relationship between a PharmGKB entity and its curation status") - ); - } - } - parent::WriteRDFBufferToWriteFile(); - } - /* 0 Entity1_id - PA267, rs5186, Haplotype for PA121 1 Entity1_name @@ -877,6 +743,10 @@ function rsid() $z = 0; $this->GetReadFile()->Read(); $this->GetReadFile()->Read(); + parent::addRDF( + parent::describeClass(parent::getVoc()."Variation", "PharmGKB Variation") + ); + while($l = $this->GetReadFile()->Read()) { if($z % 10000 == 0) { parent::writeRDFBufferToWriteFile(); @@ -885,10 +755,8 @@ function rsid() $rsid = "dbsnp:".$a[0]; $genes = explode(";",$a[1]); parent::addRDF( - parent::describeIndividual($rsid, $rsid, parent::getVoc()."Variation"). - parent::describeClass(parent::getVoc()."Variation", "PharmGKB Variation") + parent::describeIndividual($rsid, $rsid, parent::getVoc()."Variation") ); - $this->AddRDF($this->QQuad($rsid,"void:inDataset",$this->GetDatasetURI())); foreach($genes AS $gene) { parent::addRDF( parent::triplify($rsid, parent::getVoc()."gene", parent::getNamespace().$gene) @@ -899,83 +767,71 @@ function rsid() function clinical_ann_metadata() { - $this->GetReadFile()->Read(); - while($l = $this->GetReadFile()->Read(20000)) { + $header = array("Clinical Annotation Id","Location","Gene","Level of Evidence","Clinical Annotation Types","Genotype-Phenotype IDs","Annotation Text","Variant Annotations IDs","Variant Annotations","PMIDs","Evidence Count","Related Drugs","Related Diseases","Race"); + $this_header = explode("\t",$this->GetReadFile()->Read()); + if(count($this_header) != count($header)) { + trigger_error("Change in the number of columns. Expected ".count($header).", but found ".count($this_header),E_USER_ERROR); + return (-1); + } + while($l = $this->GetReadFile()->Read(200000)) { $a = explode("\t",$l); - $rsid = "dbsnp:$a[1]"; - $label = "clinical annotation for $rsid"; - // [0] => Clinical Annotation Id $id = parent::getNamespace().$a[0]; + $label = "clinical annotation for ".$a[1]; + // [0] => Clinical Annotation Id parent::addRDF( parent::describeIndividual($id, $label, parent::getVoc()."Clinical-Annotation"). parent::describeClass(parent::getVoc()."Clinical-Annotation", "PharmGKB Clinical Annotation") ); - // [1] => RSID - parent::addRDF( - parent::triplify($id, parent::getVoc()."variant", $rsid). - parent::describeProperty(parent::getVoc()."variant", "Relationship between a PharmGKB entity and a variant") - ); - - // [2] => Variant Names - if($a[2]) { - $names = explode(";",$a[2]); - foreach($names AS $name) { - parent::addRDF( - parent::triplifyString($rsid, parent::getVoc()."variant-name", addslashes(trim($name))) - ); - } + // [1] => RSID/allele + if(substr($a[1],0,2) == "rs") { + $rsid = "dbsnp:$a[1]"; parent::addRDF( - parent::describeProperty(parent::getVoc()."variant-name", "Relationship between a PharmGKB entity and a variant name") + parent::triplify($id, parent::getVoc()."x-dbsnp", $rsid). + parent::describeProperty(parent::getVoc()."x-dbsnp", "Relationship between a PharmGKB entity and a dbSNP entry") ); - } - // [3] => Location - if($a[3]) { - $chr = substr($a[3],0,strpos($a[3],":")); + } else { + // some kind of star allele parent::addRDF( - parent::triplifyString($rsid, parent::getVoc()."location", $a[3]). - parent::triplifyString($rsid, parent::getVoc()."chromosome", $chr). - parent::describeProperty(parent::getVoc()."location", "Relationship between a PharmGKB entity and a chromosomal location"). - parent::describeProperty(parent::getVoc()."chromosome", "Relationship between a PharmGKB entity and a chromosome") - + parent::triplifyString($id, parent::getVoc()."star-allele", $a[1]). + parent::describeProperty(parent::getVoc()."star-allele", "Relationship between a PharmGKB entity and a star allele") ); } - // [4] => Gene - if($a[4]){ - $genes = explode(";",$a[4]); + + // [2] => Gene + if($a[2]){ + $genes = explode(",",$a[2]); foreach($genes AS $gene) { preg_match("/\(([A-Za-z0-9]+)\)/",$gene,$m); parent::addRDF( - parent::triplify($rsid, parent::getVoc()."gene", parent::getNamespace().$m[1]). + parent::triplify($id, parent::getVoc()."gene", parent::getNamespace().$m[1]). parent::triplify(parent::getNamespace().$m[1], "rdf:type", parent::getVoc()."Gene") ); } } - - // [5] => Evidence Strength - if($a[5]) { + + // [3] => Evidence Level + if($a[3]) { parent::addRDF( - parent::triplifyString($id, parent::getVoc()."evidence-strength", $a[5]). - parent::describeProperty(parent::getVoc()."evidence-strength", "Relationship between a PharmGKB annotation and its evidence strength") + parent::triplifyString($id, parent::getVoc()."evidence-level", $a[3]). + parent::describeProperty(parent::getVoc()."evidence-level", "The level of evidence") ); } + // [6] => Clinical Annotation Types - if($a[6]) { - $types = explode(";",$a[6]); + if($a[4]) { + $types = explode(";",$a[4]); foreach($types AS $t) { parent::addRDF( - parent::triplifyString($id, parent::getVoc()."annotation-type", $t). - parent::triplify($id, "rdf:type", parent::getVoc().strtoupper($t)."-Annotation"). - parent::describeProperty(parent::getVoc()."annotation-type", "Relationship between a PharmGKB annotation and its type"). - parent::describeClass(parent::getVoc().strtoupper($t)."-Annotation", "$t Annotation") + parent::triplifyString($id, parent::getVoc()."annotation-type", strtolower($t)) ); } } - // [7] => Genotype-Phenotypes IDs - // [8] => Text - if($a[7]) { - $gps = explode(";",$a[7]); - $gps_texts = explode(";",$a[8]); + // [5] => Genotype-Phenotypes IDs + // [6] => Text + if($a[5]) { + $gps = explode(";",$a[5]); + $gps_texts = explode(";",$a[6]); foreach($gps AS $i => $gp) { $gp = trim($gp); $gp_text = trim($gps_texts[$i]); @@ -991,12 +847,12 @@ function clinical_ann_metadata() ); } } - - // [9] => Variant Annotations IDs - // [10] => Variant Annotations - if($a[9]) { - $b = explode(";",$a[9]); - $b_texts = explode(";",$a[10]); + + // [7] => Variant Annotations IDs + // [8] => Variant Annotations + if($a[7]) { + $b = explode(";",$a[7]); + $b_texts = explode(";",$a[8]); foreach($b AS $i => $variant) { $variant = trim($variant); $variant_text = trim ($b_texts[$i]); @@ -1006,9 +862,10 @@ function clinical_ann_metadata() ); } } - // [11] => PMIDs - if($a[11]) { - $b = explode(";",$a[11]); + + // [9] => PMIDs + if($a[9]) { + $b = explode(";",$a[9]); foreach($b AS $i => $pmid) { $pmid = trim($pmid); parent::addRDF( @@ -1016,58 +873,22 @@ function clinical_ann_metadata() ); } } - // [12] => Evidence Count - if($a[12]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."evidence-count", $a[12]). - parent::describeProperty(parent::getVoc()."evidence-count", "Relationship between a PharmGKB annotation and an evidence count") - ); - } - - // [13] => # Cases - if($a[13]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."cases-count", $a[13]). - parent::describeProperty(parent::getVoc()."cases-count", "Relationship between a PharmGKB annotation and a cases count") - ); - } - // [14] => # Controlled - if($a[14]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."controlled-count", $a[14]). - parent::describeProperty(parent::getVoc()."controlled-count", "Relationship between a PharmGKB annotation and a controlled count") - ); - } - // [15] => Related Genes - if($a[15]) { - $b = explode(";",$a[15]); - foreach($b AS $gene_label) { - // find the gene_id from the label - $find = array_search($gene_label, $gene_names_array); - if($find != FALSE){ - parent::addRDF( - parent::triplify($id, parent::getVoc()."related-gene", $find) - ); - } else { - $gene_id = parent::getRes().md5($gene_label); - parent::addRDF( - parent::describeIndividual($gene_id, $gene_label, parent::getVoc()."Gene"). - parent::triplify($id, parent::getVoc()."related-gene", $gene_id) - ); - } - } + + // [10] => Evidence Count + if($a[10]) { parent::addRDF( - parent::describeProperty(parent::getVoc()."related-gene", "Relationship between a PharmGKB annotation and a related gene") + parent::triplifyString($id, parent::getVoc()."evidence-count", $a[10]). + parent::describeProperty(parent::getVoc()."evidence-count", "Relationship between a PharmGKB annotation and its count of evidence") ); } - // [16] => Related Drugs - if($a[16]) { - $b = explode(";",$a[16]); + // [11] => Related Drugs + if($a[11]) { + $b = explode(";",$a[11]); foreach($b AS $drug_label) { // find the id from the label - $find = array_search($drug_label, $drug_names_array); - if($find != FALSE){ + $find = @array_search($drug_label, $this->drug_names_array); + if($find !== FALSE and $find !== NULL){ parent::addRDF( parent::triplify($id, parent::getVoc()."related-drug", $find) ); @@ -1083,13 +904,13 @@ function clinical_ann_metadata() parent::describeProperty(parent::getVoc()."related-drug", "Relationship between a PharmGKB annotation and a related drug") ); } - // [17] => Related Diseases - if($a[17]) { - $b = explode(";",$a[17]); + // [12] => Related Diseases + if($a[12]) { + $b = explode(";",$a[12]); foreach($b AS $disease_label) { // find the id from the label - $find = array_search($disease_label, $disease_names_array); - if($find != FALSE){ + $find = @array_search($disease_label, $this->disease_names_array); + if($find !== FALSE and $find !== NULL){ parent::addRDF( parent::triplify($id, parent::getVoc()."related-disease", $find) ); @@ -1105,41 +926,32 @@ function clinical_ann_metadata() parent::describeProperty(parent::getVoc()."related-disease", "Relationship between a PharmGKB annotation and a related disease") ); } - // [18] => OMB Races - if($a[18]) { + // [13] => OMB Races + if($a[13]) { parent::addRDF( - parent::triplifyString($id, parent::getVoc()."race", $a[18]). + parent::triplifyString($id, parent::getVoc()."race", $a[13]). parent::describeProperty(parent::getVoc()."race", "Relationship between a PharmGKB annotation and a race") ); } - // [19] => Is Unknown Race - if($a[19]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."race", (($a[19] == "TRUE")?"race known":"race unknown")) - ); - } - // [20] => Is Mixed Population - if($a[20]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."population-homogeneity", (($a[20] == "TRUE")?"mixed":"homogeneous")). - parent::describeProperty(parent::getVoc()."population-homogeneity", "Relationship between a PharmGKB annotation and a population homogeneity") - ); - } - // [21] => Custom Race - if($a[21]) { - parent::addRDF( - parent::triplifyString($id, parent::getVoc()."special-source", $a[21]). - parent::describeProperty(parent::getVoc()."special-source", "Relationship between a PharmGKB annotation and a special source") - ); - } } parent::writeRDFBufferToWriteFile(); } + function var_drug_ann() {return $this->variant_annotation();} + function var_fa_ann() {return $this->variant_annotation();} + function var_pheno_ann() {return $this->variant_annotation();} + + function variant_annotation() { + $canonical_header = array("Annotation ID","Variant","Gene","Drug","Literature Id","Phenotype Category","Significance","Notes","Sentence","StudyParameters"," Alleles"); + $header = explode("\t",$this->getReadFile()->read(20000)); + if(count($header) != count($canonical_header)) { + trigger_error("column mismatch! Expected ".count($canonical_header).",but found ".count($header),E_USER_ERROR); + return (-1); + } + $declaration = ''; - $this->GetReadFile()->Read(); while($l = $this->GetReadFile()->Read(20000)) { $a = explode("\t",$l); //[0] => Annotation ID @@ -1149,18 +961,27 @@ function variant_annotation() parent::describeClass(parent::getVoc()."Variant-Annotation", "PharmGKB Variant Annotation") ); - //[1] => RSID - $rsid = "dbsnp:$a[1]"; - parent::addRDF( - parent::triplify($id, parent::getVoc()."variant", $rsid). - parent::describeProperty(parent::getVoc()."variant", "Relationship between a PharmGKB entity and a variant") - ); + // [1] => RSID/allele + if(substr($a[1],0,2) == "rs") { + $rsid = "dbsnp:$a[1]"; + parent::addRDF( + parent::triplify($id, parent::getVoc()."x-dbsnp", $rsid). + parent::describeProperty(parent::getVoc()."x-dbsnp", "Relationship between a PharmGKB entity and a dbSNP entry") + ); + } else { + // some kind of star allele + parent::addRDF( + parent::triplifyString($id, parent::getVoc()."star-allele", $a[1]). + parent::describeProperty(parent::getVoc()."star-allele", "Relationship between a PharmGKB entity and a star allele") + ); + } + //[2] => Gene //CYP3A (PA27114),CYP3A4 (PA130) if($a[2]) { $genes = explode(",",$a[2]); foreach($genes AS $gene) { - preg_match("/\(([A-Za-z0-9]+)\)/",$gene,$m); + preg_match("/\((PA[A-Za-z0-9]+)\)/",$gene,$m); if(isset($m[1])) { parent::addRDF( parent::triplify($id, parent::getVoc()."gene", parent::getNamespace().$m[1]). @@ -1174,7 +995,7 @@ function variant_annotation() if($a[3]) { $drugs = explode(",",$a[3]); foreach($drugs AS $drug) { - preg_match("/\(([A-Za-z0-9]+)\)/",$drug,$m); + preg_match("/\((PA[A-Za-z0-9]+)\)/",$drug,$m); if(isset($m[1])) { parent::addRDF( parent::triplify($id, parent::getVoc()."drug", parent::getNamespace().$m[1]). @@ -1183,6 +1004,7 @@ function variant_annotation() } } } + // [4] => Literature Id if($a[4]) { $b = explode(";",$a[4]); @@ -1195,15 +1017,12 @@ function variant_annotation() } } - //[5] => Secondary Category + //[5] => Phenotype if($a[5]) { $types = explode(";",$a[5]); foreach($types AS $t) { parent::addRDF( - parent::triplifyString($id, parent::getVoc()."annotation-type", $t). - parent::triplify($id, "rdf:type", parent::getVoc().strtoupper($t)."-Annotation"). - parent::describeProperty(parent::getVoc()."annotation-type", "Relationship between a PharmGKB annotation and its type"). - parent::describeClass(parent::getVoc().strtoupper($t)."-Annotation", "$t Annotation") + parent::triplifyString($id, parent::getVoc()."annotation-type", strtolower($t)) ); } } @@ -1214,6 +1033,7 @@ function variant_annotation() parent::describeProperty(parent::getVoc()."significant", "Relationship between a PharmGKB annotation and its significance") ); } + // [7] => Notes if($a[7]) { parent::addRDF( @@ -1237,28 +1057,15 @@ function variant_annotation() $t = parent::getNamespace().trim($sp); parent::addRDF( parent::describeIndividual($t, $sp, parent::getVoc()."Study-Parameter"). - parent::triplify($id, parent::getVoc()."study-parameter", $t). - parent::describeClass(parent::getVoc()."Study-Parameter", "PharmGKB study parameter"). - parent::describeProperty(parent::getVoc()."study-parameter", "Relationship between a PharmGKB annotation and a study parameter") + parent::triplify($id, parent::getVoc()."study-parameter", $t) ); } } - //[10] => KnowledgeCategories + //[10] => Alleles if($a[10]) { - $cats = explode(";",$a[10]); - foreach($cats AS $cat) { - $t = parent::getNamespace().$cat; - parent::addRDF( - parent::triplify($id, parent::getVoc()."article-category", $t) - ); - if(!isset($declaration[$t])) { - $declaration[$t] = ''; - parent::addRDF( - parent::describeIndividual($t, $cat, parent::getVoc()."KnowledgeCategory"). - parent::describeClass(parent::getVoc()."KnowledgeCategory", "PharmGKB Knowledge Category") - ); - } - } + parent::addRDF( + parent::triplifyString($id, parent::getVoc()."alleles", $a[10]) + ); } } return TRUE; diff --git a/runparser.php b/runparser.php index d07e575..1a7e46a 100644 --- a/runparser.php +++ b/runparser.php @@ -34,8 +34,6 @@ public function __construct($argv) $parsers = $this->getParsers(); parent::addParameter('parser',true,implode("|",$parsers),null,'bio2rdf parser to run'); parent::addParameter('statistics',false,"true|false","false",'generate statistics'); - parent::addParameter('bio2rdf_release',false,null,"3",'Bio2RDF release number'); - if(parent::setParameters($argv,true) === FALSE) { if(parent::getParameterValue('parser') == '') { diff --git a/sgd/sgd.php b/sgd/sgd.php index a7c7198..013a325 100644 --- a/sgd/sgd.php +++ b/sgd/sgd.php @@ -38,7 +38,7 @@ function __construct($argv) { parent::__construct($argv,"sgd"); parent::addParameter('files',true,'all|dbxref|features|domains|protein|goa|goslim|complex|interaction|phenotype|pathways|mapping','all','all or comma-separated list of files to process'); parent::addParameter('download_url',false,null,'http://downloads.yeastgenome.org/'); - parent::addParameter('ncbo_download_dir', false, null, '/data/download/ncbo/', 'directory of ncbo ontologies'); + parent::addParameter('ncbo_download_dir', false, null, '/data/download/bioportal/', 'directory of bioportal ontologies'); parent::addParameter('ncbo_api_key',true,null,null,'your NCBO API key'); parent::addParameter('one_file',false,'true|false','true',"whether to produce a single file output"); parent::initialize(); @@ -639,7 +639,11 @@ function domains(){ "SignalP_GRAM_POSITIVE" => "signalp", "SignalP_GRAM_NEGATIVE" => "signalp", "SignalP_EUK" => "signalp", - "TMHMM" => "tmhmm" + "TMHMM" => "tmhmm", + "ProDom" => "prodom", + "ProSiteProfiles" => "prosite", + "ProSitePatterns" => "prosite", + "Hamap" => "hamap" ); while($l = $this->GetReadFile()->Read(2048)) { diff --git a/statistics/bio2rdf-individual-page.php b/statistics/bio2rdf-individual-page.php index a746231..fc6338f 100644 --- a/statistics/bio2rdf-individual-page.php +++ b/statistics/bio2rdf-individual-page.php @@ -84,6 +84,7 @@ while($l = fgets($fp)) { if(!$l or $l[0] == "#" ) continue; $a = explode("\t",$l); + if(!isset($a[2])) continue; if($dataset == 'all' or $dataset == trim($a[2])) $list[] = trim($a[2]); } fclose($fp); @@ -95,7 +96,7 @@ $endpoint = getEndpointInfo($dataset); $options['port'] = $endpoint['isql']; - $options['sparql'] = $entry['sparql'] = "http://localhost:".$endpoint['sparql']."/sparql"; + $options['sparql'] = $entry['sparql'] = "http://s2.semanticscience.org:".$endpoint['sparql']."/sparql"; $entry['target.endpoint'] = $entry['sparql']; if($options['target.endpoint']) $entry['target.endpoint'] = $options['target.endpoint']; @@ -107,6 +108,7 @@ $entry['from'] = "FROM <".$entry['graph'].">"; $entry['describe'] = ''; $outfile = $options['odir'].$dataset."/$dataset.html"; + $outfile = $options['odir']."$dataset.html"; makeHTML($entry,$outfile); echo "done.".PHP_EOL; } diff --git a/statistics/endpoint-statistics.php b/statistics/endpoint-statistics.php index f67d0a0..69ad353 100644 --- a/statistics/endpoint-statistics.php +++ b/statistics/endpoint-statistics.php @@ -29,6 +29,7 @@ */ $fnx = array( +// "typePropertyTypeCount" "triples", "distinctEntities", "distinctSubjects", @@ -744,6 +745,15 @@ function addSubjectPropertyObjectCount() function addTypePropertyTypeCount() { global $options; + $sparql = "SELECT ?p +".$options['from-graph']." + { ?s ?p ?o FILTER (!isLiteral(?o)) } + GROUP BY ?p +"; + $r = query($sparql); + foreach($r AS $c) { + $p = $c->p->value; + $sparql = "SELECT distinct ?stype (str(?stype_label) AS ?stype_label) (?sn AS ?sn) (?dsn AS ?dsn) ?p (str(?plabel) AS ?plabel) @@ -756,6 +766,7 @@ function addTypePropertyTypeCount() ?s ?p ?o . ?s a ?stype . ?o a ?otype . + FILTER(?p = <$p>) } GROUP BY ?p ?stype ?otype } @@ -798,6 +809,7 @@ function addTypePropertyTypeCount() Quad("http://bio2rdf.org/bio2rdf.dataset_vocabulary:Dataset-Object-Count", "http://www.w3.org/2000/01/rdf-schema#subClassOf", "http://bio2rdf.org/bio2rdf.dataset_vocabulary:Dataset-Descriptor") ); } + } // foreach property } function addDatasetPropertyDatasetCount() diff --git a/taxonomy/taxonomy.php b/taxonomy/taxonomy.php index e9fcbae..4dbb1b3 100644 --- a/taxonomy/taxonomy.php +++ b/taxonomy/taxonomy.php @@ -48,7 +48,7 @@ class TaxonomyParser extends Bio2RDFizer{ ), "file_url" => "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip" ), - "gi2taxid_protein" => array( +/* "gi2taxid_protein" => array( "filename" => "gi_taxid_prot.zip", "contents" => array( "gi_taxid_prot" => "gi_taxid_prot.dmp", @@ -62,7 +62,7 @@ class TaxonomyParser extends Bio2RDFizer{ ), "file_url" => "ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.zip" ) - ); +*/ ); function __construct($argv) { parent::__construct($argv, "taxonomy"); @@ -325,13 +325,20 @@ private function citations() continue; } $c = parent::getRes()."citation-id-".$a[0]; + $seealso = isset($a[4])?trim($a[4]):""; + if($seealso) { + $seealso = str_replace(array("lx: DOI ","http;//"), array("http://dx.doi.org/","http://"), $seealso); + if(strlen($seealso) > 2 and !strstr($seealso,"http")) $seealso = "http://".$seealso; + $seelalso = parent::triplify($c, "rdfs:seeAlso", $seealso); + } + parent::addRDF( parent::describeIndividual($c, $a[1], $this->getVoc()."Citation"). parent::describeClass($this->getVoc()."Citation", "Citation"). parent::triplifyString($c, parent::getVoc()."citation-key", $a[1]). ($a[2]=="0"?"":parent::triplify($c, parent::getVoc()."x-pubmed", "pubmed:".$a[2])). - (!isset($a[4])?"":parent::triplify($c, "rdfs:seeAlso", str_replace("lx: DOI ","http://dx.doi.org/", $a[4]))). - (!isset($a[5])?"":parent::triplifyString($c, parent::getVoc()."text", str_replace("\"","", $a[5]))) + $seealso. + ((isset($a[5]) and $a[5])?parent::triplifyString($c, parent::getVoc()."text", str_replace("\"","", $a[5])):"") ); if(isset($a[6])) { $taxids = explode(" ", trim($a[6])); diff --git a/wormbase/wormbase.php b/wormbase/wormbase.php index c50e730..023fff2 100644 --- a/wormbase/wormbase.php +++ b/wormbase/wormbase.php @@ -35,7 +35,7 @@ class WormbaseParser extends Bio2RDFizer { function __construct($argv) { parent::__construct($argv, "wormbase"); parent::addParameter('files', true, 'all|geneIDs|functional_descriptions|gene_associations|gene_interactions|phenotype_associations','all','files to process'); - parent::addParameter('release', false, null, 'WS243', 'Release version of WormBase'); + parent::addParameter('release', false, null, 'current', 'Release version of WormBase'); parent::addParameter('download_url', false, null,'ftp://ftp.wormbase.org/pub/wormbase/'); parent::initialize(); }//constructor @@ -49,12 +49,13 @@ public function run() $files = explode(",",parent::getParameterValue('files')); } $release = parent::getParameterValue('release'); + $releaseb = "WS249"; $remote_files = array( - "geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758.".parent::getParameterValue('release').".geneIDs.txt.gz", - "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758.".parent::getParameterValue('release').".functional_descriptions.txt.gz", - "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758.".parent::getParameterValue('release').".gene_interactions.txt.gz", - "gene_associations" => "releases/".$release."/ONTOLOGY/gene_association.".parent::getParameterValue('release').".wb", - "phenotype_associations" => "releases/".$release."/ONTOLOGY/phenotype_association.".parent::getParameterValue('release').".wb" + "geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758.".$release.".geneIDs.txt.gz", + "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758.".$release.".functional_descriptions.txt.gz", + "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758.".$release.".gene_interactions.txt.gz", + "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association.".$releaseb.".wb", + "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association.".$releaseb.".wb" ); $local_files = array( @@ -62,7 +63,7 @@ public function run() "functional_descriptions" => "wormbase.".parent::getParameterValue('release').".functional_descriptions.txt.gz", "gene_interactions" => "wormbase.".parent::getParameterValue('release').".gene_interactions.txt.gz", "gene_associations" => "wormbase.".parent::getParameterValue('release').".gene_association.wb", - "phenotype_associations" => "wormbase.".parent::getParameterValue('release')."phenotype_associations.wb" + "phenotype_associations" => "wormbase.".parent::getParameterValue('release').".phenotype_associations.wb" ); $idir = parent::getParameterValue('indir'); @@ -84,7 +85,6 @@ public function run() Utils::DownloadSingle($rfile, $lfile); echo "done!".PHP_EOL; } - if(strstr($lfile, "gz")){ parent::setReadFile($lfile, TRUE); } else { @@ -155,7 +155,6 @@ function geneIDs() if($l[0] == '#') continue; // taxon, gene id, symbol, cosmid, status $data = explode(",",trim($l)); - if($first) { if(($c = count($data) != 5)) { trigger_error("WormBase function expects 5 fields, found $c!".PHP_EOL, E_USER_WARNING); @@ -170,7 +169,8 @@ function geneIDs() parent::addRDF( parent::describeIndividual($id, $label, parent::getVoc()."Gene"). parent::describeClass(parent::getVoc()."Gene", "Wormbase Gene"). - parent::triplify($id, parent::getVoc()."taxonomy", "taxonomy:".$data[1]). + parent::triplify($id, "rdfs:seeAlso", "http://www.wormbase.org/db/gene/gene?name=".$data[1]). + parent::triplify($id, parent::getVoc()."x-taxonomy", "taxonomy:".$data[0]). parent::triplifyString($id, parent::getVoc()."approved-gene-name", $data[2]) ); #Add cosmid name @@ -190,10 +190,11 @@ function functional_descriptions() { while($l = $this->getReadFile()->read(2000000)){ if($l[0] == "#") continue; - // gene_id public_name molecular_name concise_description provisional_description detailed_description gene_class_description + if(strstr($l,"gene_id")) continue; - $a = explode("\t",rtrim($l)); - if(count($a) != 7) {trigger_error("Found one row that only has ".count($a)." columns, expecting 7");continue;} + // gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description + $a = explode("\t",$l); + if(count($a) != 8) {trigger_error("Found one row that only has ".count($a)." columns, expecting 8",E_USER_ERROR);continue;} $id = parent::getNamespace().$a[0]; $label = $a[1].($a[2]?" (".$a[2].")":""); @@ -204,7 +205,8 @@ function functional_descriptions() parent::triplifyString($id, parent::getVoc()."concise-description", $a[3]). parent::triplifyString($id, parent::getVoc()."provisional-description", $a[4]). parent::triplifyString($id, parent::getVoc()."detailed-description", $a[5]). - parent::triplifyString($id, parent::getVoc()."gene-class-description", $a[6]) + parent::triplifyString($id, parent::getVoc()."automated-description", $a[6]). + parent::triplifyString($id, parent::getVoc()."gene-class-description", trim($a[7])) ); parent::writeRDFBufferToWriteFile(); } @@ -213,12 +215,15 @@ function functional_descriptions() function gene_associations(){ $go_evidence_type = array( 'IC'=>'eco:0000001', + 'IBA'=>'eco:0000318', 'IDA'=>'eco:0000314', 'IEA'=>'eco:0000203', 'IEP'=>'eco:0000008', 'IGI'=>'eco:0000316', + 'IKR'=>'eco:0000320', 'IMP'=>'eco:0000315', 'IPI'=>'eco:0000021', + 'IRD'=>'eco:0000321', 'ISM'=>'eco:0000202', 'ISO'=>'eco:0000201', 'ISS'=>'eco:0000044', @@ -258,16 +263,17 @@ function gene_associations(){ $split_paper = explode(":", $paper); if($split_paper[0] == "PMID"){ $paper_id = "pubmed:".$split_paper[1]; + parent::addRDF( + parent::triplify($association_id, parent::getVoc()."x-pubmed", $paper_id) + ); } elseif($split_paper[0] == "WB_REF"){ $paper_id = parent::getNamespace().$split_paper[1]; $paper_label = "Wormbase paper ".$split_paper[1]; parent::addRDF( - parent::describeIndividual($paper_id, $paper_label, parent::getVoc()."Publication") + parent::describeIndividual($paper_id, $paper_label, parent::getVoc()."Publication"). + parent::triplify($association_id, parent::getVoc()."publication", $paper_id) ); } - parent::addRDF( - parent::triplify($association_id, parent::getVoc()."publication", $paper_id) - ); }//foreach parent::WriteRDFBufferToWriteFile(); }//while @@ -287,8 +293,7 @@ function phenotype_associations() $not = $data[3]; $phenotype = $data[4]; $paper = $data[5]; - $var_rnai = explode("WB:",$data[7]); - + $variant = explode("|",trim($data[7])); $neg = ($not == "NOT"?"Negative ":""); $pa_id = parent::getRes().($z++); @@ -308,18 +313,17 @@ function phenotype_associations() ); if(strstr($data[7], "WBVar")){ - foreach($var_rnai AS $v) { - $v = str_replace("|","",$v); + foreach($variant AS $v) { + if(trim($v) == '') continue; parent::addRDF( - parent::describeIndividual(parent::getNamespace().$v, "Variant of ".$gene, parent::getVoc()."Gene-Variant"). + parent::describeIndividual($v, "Variant of ".$gene, parent::getVoc()."Gene-Variant"). parent::describeClass(parent::getVoc()."Gene-Variant","Gene Variant"). - parent::triplify($pa_id, parent::getVoc()."associated-gene-variant", parent::getNamespace().$v) + parent::triplify($pa_id, parent::getVoc()."associated-gene-variant", $v) ); } } elseif(strstr($data[7], "WBRNAi")){ - foreach($var_rnai AS $v) { - $v = str_replace("|","",$v); - $var_rnai_id = parent::getNamespace().$v; + foreach($variant AS $v) { + $var_rnai_id = $v; $var_rnai_label = "RNAi ".$v; $rnai_exp_id = parent::getRes().($z++); parent::addRDF( @@ -332,7 +336,9 @@ function phenotype_associations() parent::triplify($pa_id, parent::getVoc()."associated-rnai-knockdown-experiment", $rnai_exp_id) ); } - } + } else { +// var_dump($variant); + } if($neg) { parent::addRDF(