Ticket #1577 (new defect)
Can't upload file with foreign chars in filename
| Reported by: | dread | Owned by: | rgrp |
|---|---|---|---|
| Priority: | major | Milestone: | ckan-backlog |
| Component: | ckan | Keywords: | |
| Cc: | Repository: | ckan | |
| Theme: | none |
Description
Looks like uploading a file with foreign characters fails due to encoding reasons.
URL: http://thedatahub.org/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ%C3%AD-%C4%8Cesk%C3%A9-republiky-_-P%C5%99%C3%ADprava-rozpo%C4%8Dtu.pdf
Module weberror.errormiddleware:162 in __call__
<< __traceback_supplement__ = Supplement, self, environ
sr_checker = ResponseStartChecker(start_response)
app_iter = self.application(environ, sr_checker)
return self.make_catching_iter(app_iter, environ, sr_checker)
except:
>> app_iter = self.application(environ, sr_checker)
Module beaker.middleware:73 in __call__
<< self.cache_manager)
environ[self.environ_key] = self.cache_manager
return self.app(environ, start_response)
>> return self.app(environ, start_response)
Module beaker.middleware:152 in __call__
<< headers.append(('Set-cookie', cookie))
return start_response(status, headers, exc_info)
return self.wrap_app(environ, session_start_response)
def _get_session(self):
>> return self.wrap_app(environ, session_start_response)
Module routes.middleware:130 in __call__
<< environ['SCRIPT_NAME'] = environ['SCRIPT_NAME'][:-1]
response = self.app(environ, start_response)
# Wrapped in try as in rare cases the attribute will be gone already
>> response = self.app(environ, start_response)
Module pylons.wsgiapp:125 in __call__
<<
controller = self.resolve(environ, start_response)
response = self.dispatch(controller, environ, start_response)
if 'paste.testing_variables' in environ and hasattr(response,
>> response = self.dispatch(controller, environ, start_response)
Module pylons.wsgiapp:324 in dispatch
<< if log_debug:
log.debug("Calling controller class with WSGI interface")
return controller(environ, start_response)
def load_test_env(self, environ):
>> return controller(environ, start_response)
Module ckan.lib.base:123 in __call__
<< # available in environ['pylons.routes_dict']
try:
return WSGIController.__call__(self, environ, start_response)
finally:
model.Session.remove()
>> return WSGIController.__call__(self, environ, start_response)
Module pylons.controllers.core:221 in __call__
<< return response(environ, self.start_response)
response = self._dispatch_call()
if not start_response_called:
self.start_response = start_response
>> response = self._dispatch_call()
Module pylons.controllers.core:172 in _dispatch_call
<< req.environ['pylons.action_method'] = func
response = self._inspect_call(func)
else:
if log_debug:
>> response = self._inspect_call(func)
Module pylons.controllers.core:107 in _inspect_call
<< func.__name__, args)
try:
result = self._perform_call(func, args)
except HTTPException, httpe:
if log_debug:
>> result = self._perform_call(func, args)
Module pylons.controllers.core:60 in _perform_call
<< """Hide the traceback for everything above this method"""
__traceback_hide__ = 'before_and_this'
return func(**args)
def _inspect_call(self, func):
>> return func(**args)
Module ckanext.storage.controller:2 in auth_form
Module ckan.lib.jsonp:26 in jsonpify
<< Very much modelled after pylons.decorators.jsonify .
"""
data = func(*args, **kwargs)
return to_jsonp(data)
>> data = func(*args, **kwargs)
Module ckanext.storage.controller:301 in auth_form
<< method = 'POST'
authorize(method, bucket, label, c.userobj, self.ofs)
data = self._get_form_data(label)
return data
>> authorize(method, bucket, label, c.userobj, self.ofs)
Module ckanext.storage.controller:79 in authorize
<< if method != 'GET':
# do not allow overwriting
if ofs.exists(bucket, key):
abort(409)
# now check user stuff
>> if ofs.exists(bucket, key):
Module ofs.remote.botostore:53 in exists
<< if bucket is None:
return False
return (label is None) or (label in bucket)
def claim_bucket(self, bucket):
>> return (label is None) or (label in bucket)
Module boto.s3.bucket:87 in __contains__
<< def __contains__(self, key_name):
return not (self.get_key(key_name) is None)
def startElement(self, name, attrs, connection):
>> return not (self.get_key(key_name) is None)
Module boto.s3.bucket:144 in get_key
<< response = self.connection.make_request('HEAD', self.name, key_name,
headers=headers,
query_args=query_args)
# Allow any success status (2xx) - for example this lets us
# support Range gets, which return status 206:
>> query_args=query_args)
Module boto.s3.connection:388 in make_request
<< if isinstance(key, Key):
key = key.name
path = self.calling_format.build_path_base(bucket, key)
boto.log.debug('path=%s' % path)
auth_path = self.calling_format.build_auth_path(bucket, key)
>> path = self.calling_format.build_path_base(bucket, key)
Module boto.s3.connection:88 in build_path_base
<< def build_path_base(self, bucket, key=''):
return '/%s' % urllib.quote(key)
class SubdomainCallingFormat(_CallingFormat):
>> return '/%s' % urllib.quote(key)
Module urllib:1222 in quote
<< safe_map[c] = (c in safe) and c or ('%%%02X' % i)
_safemaps[cachekey] = safe_map
res = map(safe_map.__getitem__, s)
return ''.join(res)
>> res = map(safe_map.__getitem__, s)
KeyError: u'\xed'
CGI Variables
AUTH_TYPE 'cookie'
CONTENT_TYPE '; charset=utf-8'
DOCUMENT_ROOT '/htdocs'
GATEWAY_INTERFACE 'CGI/1.1'
HTTP_ACCEPT '*/*'
HTTP_ACCEPT_CHARSET 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
HTTP_ACCEPT_ENCODING 'gzip,deflate,sdch'
HTTP_ACCEPT_LANGUAGE 'en-US,en;q=0.8'
HTTP_CACHE_CONTROL 'max-age=259200'
HTTP_CONNECTION 'keep-alive'
HTTP_COOKIE 'thedatahub_net=27a7f095fcca1ea6b36df996d595e3278b16f4538862bf7f88d49e2000b9246547c8fd0e; auth_tkt="f9c6ab2b0d9fcd71c4c2408bc12fab544eef1c45elenaibp!userid_type:unicode"; auth_tkt="f9c6ab2b0d9fcd71c4c2408bc12fab544eef1c45elenaibp!userid_type:unicode"; ckan_user=elenaibp; ckan_display_name="Elena Mondo"; ckan_apikey=decd48b1-49ee-4250-bff4-98ccca9c02a5; hide_welcome_message=1; __utma=119670349.1809834699.1323782464.1324293066.1324298316.4; __utmb=119670349.3.10.1324298316; __utmc=119670349; __utmz=119670349.1323782464.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
HTTP_HOST 'thedatahub.org'
HTTP_REFERER 'http://thedatahub.org/dataset/edit/budget-library-czeck-republic'
HTTP_USER_AGENT 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7'
HTTP_VIA '1.1 localhost (squid/3.0.STABLE19)'
HTTP_X_FORWARDED_FOR '87.114.74.190'
HTTP_X_REQUESTED_WITH 'XMLHttpRequest'
PATH '/usr/local/bin:/usr/bin:/bin'
PATH_INFO '/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ\xc3\xad-\xc4\x8cesk\xc3\xa9-republiky-_-P\xc5\x99\xc3\xadprava-rozpo\xc4\x8dtu.pdf'
PATH_TRANSLATED '/home/okfn/var/srvc/ckan.net/pyenv/bin/ckan.net.py/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ\xc3\xad-\xc4\x8cesk\xc3\xa9-republiky-_-P\xc5\x99\xc3\xadprava-rozpo\xc4\x8dtu.pdf'
REMOTE_ADDR '193.34.146.142'
REMOTE_PORT '55419'
REMOTE_USER u'elenaibp'
REMOTE_USER_DATA 'userid_type:unicode'
REMOTE_USER_TOKENS ['']
REQUEST_METHOD 'GET'
REQUEST_URI '/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ%C3%AD-%C4%8Cesk%C3%A9-republiky-_-P%C5%99%C3%ADprava-rozpo%C4%8Dtu.pdf'
SCRIPT_FILENAME '/home/okfn/var/srvc/ckan.net/pyenv/bin/ckan.net.py'
SCRIPT_URI 'http://thedatahub.org/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ\xc3\xad-\xc4\x8cesk\xc3\xa9-republiky-_-P\xc5\x99\xc3\xadprava-rozpo\xc4\x8dtu.pdf'
SCRIPT_URL '/api/storage/auth/form/2011-12-19T124447/Ministerstvo-financ\xc3\xad-\xc4\x8cesk\xc3\xa9-republiky-_-P\xc5\x99\xc3\xadprava-rozpo\xc4\x8dtu.pdf'
SERVER_ADDR '193.34.146.146'
SERVER_ADMIN '[no address given]'
SERVER_NAME 'thedatahub.org'
SERVER_PORT '80'
SERVER_PROTOCOL 'HTTP/1.0'
SERVER_SIGNATURE '<address>Apache/2.2.14 (Ubuntu) Server at thedatahub.org Port 80</address>\n'
SERVER_SOFTWARE 'Apache/2.2.14 (Ubuntu)'
WSGI Variables
application <beaker.middleware.CacheMiddleware object at 0x7f22601c7dd0>
beaker.cache <beaker.cache.CacheManager object at 0x7f22601c7b50>
beaker.get_session <bound method SessionMiddleware._get_session of <beaker.middleware.SessionMiddleware object at 0x7f22601c7a90>>
beaker.session {'_accessed_time': 1324298703.071357, '_creation_time': 1324293077.4139669}
mod_wsgi.application_group 'ckan.net|'
mod_wsgi.callable_object 'application'
mod_wsgi.listener_host ''
mod_wsgi.listener_port '80'
mod_wsgi.process_group 'ckan.net'
mod_wsgi.reload_mechanism '1'
mod_wsgi.script_reloading '1'
mod_wsgi.version (2, 8)
paste.cookies (<SimpleCookie: __utma='119670349.1809834699.1323782464.1324293066.1324298316.4' __utmb='119670349.3.10.1324298316' __utmc='119670349' __utmz='119670349.1323782464.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' auth_tkt='f9c6ab2b0d9fcd71c4c2408bc12fab544eef1c45elenaibp!userid_type:unicode' ckan_apikey='decd48b1-49ee-4250-bff4-98ccca9c02a5' ckan_display_name='Elena Mondo' ckan_user='elenaibp' hide_welcome_message='1' thedatahub_net='27a7f095fcca1ea6b36df996d595e3278b16f4538862bf7f88d49e2000b9246547c8fd0e'>, 'thedatahub_net=27a7f095fcca1ea6b36df996d595e3278b16f4538862bf7f88d49e2000b9246547c8fd0e; auth_tkt="f9c6ab2b0d9fcd71c4c2408bc12fab544eef1c45elenaibp!userid_type:unicode"; auth_tkt="f9c6ab2b0d9fcd71c4c2408bc12fab544eef1c45elenaibp!userid_type:unicode"; ckan_user=elenaibp; ckan_display_name="Elena Mondo"; ckan_apikey=decd48b1-49ee-4250-bff4-98ccca9c02a5; hide_welcome_message=1; _ _utma=119670349.1809834699.1323782464.1324293066.1324298316.4; __utmb=119670349.3.10...)|utmcmd=(none)')
paste.registry <paste.registry.Registry object at 0x7f226194df50>
paste.throw_errors True
pylons.action_method <bound method StorageAPIController.auth_form of <ckanext.storage.controller.StorageAPIController object at 0x7f2261dad990>>
pylons.controller <ckanext.storage.controller.StorageAPIController object at 0x7f2261dad990>
pylons.environ_config {'session': 'beaker.session', 'cache': 'beaker.cache'}
pylons.pylons <pylons.util.PylonsContext object at 0x7f2261daddd0>
pylons.routes_dict {'action': u'auth_form', 'controller': u'ckanext.storage.controller:StorageAPIController', 'label': u'2011-12-19T124447/Ministerstvo-financ\xed-\u010cesk\xe9-republiky-_-P\u0159\xedprava-rozpo\u010dtu.pdf'}
repoze.who.identity <repoze.who identity (hidden, dict-like) at 139785645747120>
repoze.who.logger <logging.Logger instance at 0x7f225e23c098>
repoze.who.plugins {'openid': <OpenIdIdentificationPlugin 139785625065680>, 'friendlyform': <FriendlyFormPlugin 139785618095248>, 'ckan.lib.authenticator:UsernamePasswordAuthenticator': <ckan.lib.authenticator.UsernamePasswordAuthenticator object at 0x7f2260874c10>, 'auth_tkt': <AuthTktCookiePlugin 139785625065808>, 'ckan.lib.authenticator:OpenIDAuthenticator': <ckan.lib.authenticator.OpenIDAuthenticator object at 0x7f2260874c90>}
routes.route <routes.route.Route object at 0x7f22601a1090>
routes.url <routes.util.URLGenerator object at 0x7f2261dadf50>
webob._parsed_query_vars (GET([]), '')
webob.adhoc_attrs {'language': 'en-us'}
wsgi process 'Multiprocess'
wsgi.file_wrapper <built-in method file_wrapper of mod_wsgi.Adapter object at 0x7f2261da9af8>
wsgiorg.routing_args (<routes.util.URLGenerator object at 0x7f2261dadf50>, {'action': u'auth_form', 'controller': u'ckanext.storage.controller:StorageAPIController', 'label': u'2011-12-19T124447/Ministerstvo-financ\xed-\u010cesk\xe9-republiky-_-P\u0159\xedprava-rozpo\u010dtu.pdf'})
Change History
Note: See
TracTickets for help on using
tickets.

Foreign characters aren't allowed in file names I would image as they will be part of url. worth noting this at some point but don't think this is very high priority and hence deferring out of v1.6.