1 | require 'rubygems' |
---|
2 | require 'bluecloth' |
---|
3 | require 'open-uri' |
---|
4 | require 'json' |
---|
5 | require 'hpricot' |
---|
6 | |
---|
7 | def escape_xml(s) |
---|
8 | escaped = s.dup |
---|
9 | |
---|
10 | escaped.gsub!("&", "&") |
---|
11 | escaped.gsub!("<", "<") |
---|
12 | escaped.gsub!(">", ">") |
---|
13 | |
---|
14 | return escaped |
---|
15 | end |
---|
16 | |
---|
17 | url = "http://ckan.net/api/rest/package" |
---|
18 | |
---|
19 | data = open(url).read() |
---|
20 | pkglist = JSON.parse( data ) |
---|
21 | |
---|
22 | f = File.open("ckan.rdf", "w") |
---|
23 | f.puts "\ |
---|
24 | <rdf:RDF |
---|
25 | xmlns:dc=\"http://purl.org/dc/terms/\"\n\ |
---|
26 | xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n\ |
---|
27 | xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n\ |
---|
28 | xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n\ |
---|
29 | xmlns:cc=\"http://creativecommons.org/ns#\"\n\ |
---|
30 | xmlns:ckan=\"http://ckan.net/ontology/\">\n\n" |
---|
31 | |
---|
32 | #name = "patent-nber" |
---|
33 | pkglist.each do |name| |
---|
34 | if name != nil && name != "patent-nber" && name != "esfdb" && name != "hapmap" |
---|
35 | |
---|
36 | api_request = "http://ckan.net/api/rest/package/#{name}/" |
---|
37 | data = open(api_request).read() |
---|
38 | json = JSON.parse( data ) |
---|
39 | |
---|
40 | f.puts "<ckan:Package rdf:about=\"http://ckan.net/package/rdf/#{name}/\">" |
---|
41 | |
---|
42 | if json["title"] != nil && json["title"] != "" |
---|
43 | f.puts "<dc:title>#{escape_xml( json["title"])}</dc:title>" |
---|
44 | end |
---|
45 | |
---|
46 | if json["url"] != nil && json["url"] != "" |
---|
47 | f.puts "<foaf:homepage rdf:resource=\"#{escape_xml( json["url"] ) }\"/>" |
---|
48 | end |
---|
49 | |
---|
50 | f.puts "<foaf:isPrimaryTopicOf rdf:resource=\"http://ckan.net/package/#{name}\"/>" |
---|
51 | if json["download_url"] != nil && json["download_url"] != "" |
---|
52 | f.puts "<ckan:downloadURL rdf:resource=\"#{escape_xml( json["download_url"] )}\"/>" |
---|
53 | end |
---|
54 | |
---|
55 | json["tags"].each do |tag| |
---|
56 | f.puts "<dc:subject>#{escape_xml(tag)}</dc:subject>" |
---|
57 | end |
---|
58 | |
---|
59 | if json["notes"] != nil && json["notes"] != "" |
---|
60 | f.puts "<dc:description rdf:parseType=\"Literal\">" |
---|
61 | bc = BlueCloth.new( json["notes"]) |
---|
62 | doc = Hpricot( bc.to_html() ) |
---|
63 | f.puts doc |
---|
64 | f.puts "</dc:description>" |
---|
65 | |
---|
66 | end |
---|
67 | f.puts "</ckan:Package>" |
---|
68 | |
---|
69 | end |
---|
70 | |
---|
71 | end |
---|
72 | |
---|
73 | f.puts "\n\n</rdf:RDF>" |
---|
74 | f.close() |
---|
75 | |
---|