Ticket #1144 (new enhancement)

Opened 3 years ago

Last modified 23 months ago

Support DSPL

Reported by: timmcnamara Owned by:
Priority: awaiting triage Milestone: ckan-backlog
Component: ckan Keywords:
Cc: Repository: ckan
Theme: none

Description

DSPL, the Dataset Publishing Language, is being promoted by Google for its "Google Public Data Explorer" system. It is an XML format with metadata.

The format is described on the developer docs ofthe Google Code site.

Google provides a Python script which reads CSV data and generates DSPL

Sample from http://code.google.com/apis/publicdata/docs/dspl_sample.html:

<?xml version="1.0" encoding="UTF-8"?>
<dspl xmlns="http://schemas.google.com/dspl/2010"
    xmlns:geo="http://www.google.com/publicdata/dataset/google/geo"
    xmlns:geo_usa="http://www.google.com/publicdata/dataset/google/geo/us"
    xmlns:time="http://www.google.com/publicdata/dataset/google/time"
    xmlns:quantity="http://www.google.com/publicdata/dataset/google/quantity"
    xmlns:entity="http://www.google.com/publicdata/dataset/google/entity">

  <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
  <import namespace="http://www.google.com/publicdata/dataset/google/quantity"/>
  <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
  <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
  
  <info>
    <name>
      <value>My statistics</value>
    </name>
    <description>
      <value>Some very interesting statistics about countries</value>
    </description>
    <url>
      <value>http://www.stats-bureau.com/mystats/info.html</value>
    </url>
  </info>

  <provider>
    <name>
      <value>Bureau of Statistics</value>
    </name>
    <url>
      <value>http://www.stats-bureau.com</value>
    </url>
  </provider>

  <topics>
    <topic id="geography">
      <info>
        <name><value>Geography</value></name>
      </info>
    </topic>
    <topic id="social_indicators">
      <info>
        <name><value>Social indicators</value></name>
      </info>
      <topic id="population_indicators">
        <info>
          <name><value>Population indicators</value></name>
        </info>
      </topic>
      <topic id="poverty_and_income">
        <info>
          <name><value>Poverty & income</value></name>
        </info>
      </topic>
      <topic id="health">
        <info>
          <name><value>Health</value></name>
        </info>
      </topic>
    </topic>
  </topics>

  <concepts>
    <!-- As noted in the tutorial, this concept should extend quantity:amount.-->
    <concept id="population">
      <info>
        <name>
          <value>Population</value>
        </name>
        <description>
          <value>Size of the resident population.</value>
        </description>
      </info>
      <topic ref="population_indicators"/>
      <type ref="integer"/>
    </concept>

    <!-- This country concept is defined for educational purposes only. A country
    concept exists in the Google geo dataset. See:

    http://code.google.com/apis/publicdata/docs/canonical/geo.html --> 
    <concept id="country" extends="geo:location">
      <info>
        <name>
          <value>Country</value>
        </name>
        <description>
          <value>My list of countries</value>
        </description>
      </info>
      <type ref="string"/>
      <property id="name">
        <info>
          <name><value xml:lang="en">Country name</value></name>
          <description>
            <value xml:lang="en">The official name of the country</value>
          </description>
        </info>
        <type ref="string"/>
      </property>
      <table ref="countries_table"/>
    </concept>

    <!-- This US state concept is defined for educational purposes only. A US state
      concept exists in the Google geo US dataset. See:

      http://code.google.com/apis/publicdata/docs/canonical/geo.us.html --> 
    <concept id="state" extends="geo:location">
      <info>
        <name>
          <value>State</value>
        </name>
        <description>
          <value>US states</value>
        </description>
      </info>
      <type ref="string"/>
      <property concept="country" isParent="true"/>
      <table ref="states_table"/>
    </concept>

    <concept id="gender" extends="entity:entity">
      <info>
          <name>
          <value>Gender</value>
        </name>
        <description>
          <value>Gender, Male or Female</value>
        </description>
        <pluralName><value>Genders</value></pluralName>
        <totalName><value>Both genders</value></totalName>
      </info>
      <type ref="string"/>
      <table ref="genders_table"/>
    </concept>

    <concept id="unemployment_rate" extends="quantity:rate">
      <info>
        <name>
          <value>unemployment rate</value>
        </name>
        <description>
          <value>The percent of the labor force that is unemployed, not seasonally
            adjusted.</value>
        </description>
        <url><value>http://www.bls.gov/cps/cps_htgm.htm</value></url>
      </info>
      <topic ref="social_indicators"/>
      <type ref="float"/>
      <attribute id="is_percentage">
        <type ref="boolean"/>
        <value>true</value>
      </attribute>
    </concept>

  </concepts>

  <slices>
    <slice id="countries_slice">
      <dimension concept="country"/>
      <dimension concept="time:year"/>
      <metric concept="population"/>
      <table ref="countries_slice_table"/>
    </slice>

    <slice id="states_slice">
      <dimension concept="state"/>
      <dimension concept="time:year"/>
      <metric concept="population"/>
      <metric concept="unemployment_rate"/>
      <table ref="states_slice_table"/>
    </slice>

    <slice id="countries_gender_slice">
      <dimension concept="country"/>
      <dimension concept="gender"/>
      <dimension concept="time:year"/>
      <metric concept="population"/>
      <table ref="countries_gender_slice_table"/>
    </slice>

  </slices>

  <tables>
    <table id="countries_table">
      <column id="country" type="string"/>
      <column id="name" type="string"/>
      <column id="latitude" type="float"/>
      <column id="longitude" type="float"/>
      <data>
        <file format="csv" encoding="utf-8">countries.csv</file>
      </data>
    </table>

    <table id="countries_slice_table">
      <column id="country" type="string"/>
      <column id="year" type="date" format="yyyy"/>
      <column id="population" type="integer"/>
      <data>
        <file format="csv" encoding="utf-8">country_slice.csv</file>
      </data>
    </table>

    <table id="states_table">
      <column id="state" type="string"/>
      <column id="name" type="string"/>
      <column id="country" type="string">
        <value>US</value>
      </column>
      <column id="latitude" type="float"/>
      <column id="longitude" type="float"/>
      <data>
        <file format="csv" encoding="utf-8">states.csv</file>
      </data>
    </table>

    <table id="states_slice_table">
      <column id="state" type="string"/>
      <column id="year" type="date" format="yyyy"/>
      <column id="population" type="integer"/>
      <column id="unemployment_rate" type="float"/>
      <data>
        <file format="csv" encoding="utf-8">state_slice.csv</file>
      </data>
    </table>

    <table id="genders_table">
      <column id="gender" type="string"/>
      <column id="name" type="string"/>
      <data>
        <file format="csv" encoding="utf-8">genders.csv</file>
      </data>
    </table>

    <table id="countries_gender_slice_table">
      <column id="country" type="string"/>
      <column id="gender" type="string"/>
      <column id="year" type="date" format="yyyy"/>
      <column id="population" type="integer"/>
      <data>
        <file format="csv" encoding="utf-8">gender_country_slice.csv</file>
      </data>
    </table>
  </tables>

</dspl>

Change History

comment:1 Changed 23 months ago by seanh

  • Milestone set to ckan-backlog
Note: See TracTickets for help on using tickets.