| 1 | require 'rubygems' |
|---|
| 2 | require 'bluecloth' |
|---|
| 3 | require 'open-uri' |
|---|
| 4 | require 'json' |
|---|
| 5 | require 'hpricot' |
|---|
| 6 | |
|---|
| 7 | def escape_xml(s) |
|---|
| 8 | escaped = s.dup |
|---|
| 9 | |
|---|
| 10 | escaped.gsub!("&", "&") |
|---|
| 11 | escaped.gsub!("<", "<") |
|---|
| 12 | escaped.gsub!(">", ">") |
|---|
| 13 | |
|---|
| 14 | return escaped |
|---|
| 15 | end |
|---|
| 16 | |
|---|
| 17 | url = "http://ckan.net/api/rest/package" |
|---|
| 18 | |
|---|
| 19 | data = open(url).read() |
|---|
| 20 | pkglist = JSON.parse( data ) |
|---|
| 21 | |
|---|
| 22 | f = File.open("ckan.rdf", "w") |
|---|
| 23 | f.puts "\ |
|---|
| 24 | <rdf:RDF |
|---|
| 25 | xmlns:dc=\"http://purl.org/dc/terms/\"\n\ |
|---|
| 26 | xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n\ |
|---|
| 27 | xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n\ |
|---|
| 28 | xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n\ |
|---|
| 29 | xmlns:cc=\"http://creativecommons.org/ns#\"\n\ |
|---|
| 30 | xmlns:ckan=\"http://ckan.net/ontology/\">\n\n" |
|---|
| 31 | |
|---|
| 32 | #name = "patent-nber" |
|---|
| 33 | pkglist.each do |name| |
|---|
| 34 | if name != nil && name != "patent-nber" && name != "esfdb" && name != "hapmap" |
|---|
| 35 | |
|---|
| 36 | api_request = "http://ckan.net/api/rest/package/#{name}/" |
|---|
| 37 | data = open(api_request).read() |
|---|
| 38 | json = JSON.parse( data ) |
|---|
| 39 | |
|---|
| 40 | f.puts "<ckan:Package rdf:about=\"http://ckan.net/package/rdf/#{name}/\">" |
|---|
| 41 | |
|---|
| 42 | if json["title"] != nil && json["title"] != "" |
|---|
| 43 | f.puts "<dc:title>#{escape_xml( json["title"])}</dc:title>" |
|---|
| 44 | end |
|---|
| 45 | |
|---|
| 46 | if json["url"] != nil && json["url"] != "" |
|---|
| 47 | f.puts "<foaf:homepage rdf:resource=\"#{escape_xml( json["url"] ) }\"/>" |
|---|
| 48 | end |
|---|
| 49 | |
|---|
| 50 | f.puts "<foaf:isPrimaryTopicOf rdf:resource=\"http://ckan.net/package/#{name}\"/>" |
|---|
| 51 | if json["download_url"] != nil && json["download_url"] != "" |
|---|
| 52 | f.puts "<ckan:downloadURL rdf:resource=\"#{escape_xml( json["download_url"] )}\"/>" |
|---|
| 53 | end |
|---|
| 54 | |
|---|
| 55 | json["tags"].each do |tag| |
|---|
| 56 | f.puts "<dc:subject>#{escape_xml(tag)}</dc:subject>" |
|---|
| 57 | end |
|---|
| 58 | |
|---|
| 59 | if json["notes"] != nil && json["notes"] != "" |
|---|
| 60 | f.puts "<dc:description rdf:parseType=\"Literal\">" |
|---|
| 61 | bc = BlueCloth.new( json["notes"]) |
|---|
| 62 | doc = Hpricot( bc.to_html() ) |
|---|
| 63 | f.puts doc |
|---|
| 64 | f.puts "</dc:description>" |
|---|
| 65 | |
|---|
| 66 | end |
|---|
| 67 | f.puts "</ckan:Package>" |
|---|
| 68 | |
|---|
| 69 | end |
|---|
| 70 | |
|---|
| 71 | end |
|---|
| 72 | |
|---|
| 73 | f.puts "\n\n</rdf:RDF>" |
|---|
| 74 | f.close() |
|---|
| 75 | |
|---|