Ticket #106: convert.rb

File convert.rb, 2.0 KB (added by rgrp, 5 years ago)
Line 
1require 'rubygems'
2require 'bluecloth'
3require 'open-uri'
4require 'json'
5require 'hpricot'
6
7def escape_xml(s)
8  escaped = s.dup
9 
10  escaped.gsub!("&", "&")
11  escaped.gsub!("<", "&lt;")
12  escaped.gsub!(">", "&gt;")
13         
14  return escaped
15end
16 
17url = "http://ckan.net/api/rest/package"
18
19data = open(url).read()
20pkglist = JSON.parse( data )
21
22f = File.open("ckan.rdf", "w")
23f.puts "\
24<rdf:RDF     
25 xmlns:dc=\"http://purl.org/dc/terms/\"\n\
26 xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n\
27 xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n\
28 xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n\
29 xmlns:cc=\"http://creativecommons.org/ns#\"\n\
30 xmlns:ckan=\"http://ckan.net/ontology/\">\n\n"   
31
32#name = "patent-nber"
33pkglist.each do |name|
34  if name != nil && name != "patent-nber" && name != "esfdb" && name != "hapmap"
35   
36    api_request = "http://ckan.net/api/rest/package/#{name}/"
37    data = open(api_request).read()
38    json = JSON.parse( data )
39   
40    f.puts "<ckan:Package rdf:about=\"http://ckan.net/package/rdf/#{name}/\">"
41   
42    if json["title"] != nil && json["title"] != ""
43      f.puts "<dc:title>#{escape_xml( json["title"])}</dc:title>" 
44    end
45   
46    if json["url"] != nil && json["url"] != ""
47      f.puts "<foaf:homepage rdf:resource=\"#{escape_xml( json["url"] ) }\"/>" 
48    end
49   
50    f.puts "<foaf:isPrimaryTopicOf rdf:resource=\"http://ckan.net/package/#{name}\"/>"
51    if json["download_url"] != nil && json["download_url"] != ""
52      f.puts "<ckan:downloadURL rdf:resource=\"#{escape_xml( json["download_url"] )}\"/>" 
53    end
54     
55    json["tags"].each do |tag|
56      f.puts "<dc:subject>#{escape_xml(tag)}</dc:subject>"
57    end
58   
59    if json["notes"] != nil && json["notes"] != ""
60      f.puts "<dc:description rdf:parseType=\"Literal\">"
61      bc = BlueCloth.new( json["notes"])
62      doc = Hpricot( bc.to_html() )
63      f.puts doc
64      f.puts "</dc:description>"
65     
66    end
67    f.puts "</ckan:Package>"
68       
69  end 
70 
71end
72
73f.puts "\n\n</rdf:RDF>"
74f.close() 
75