You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
831 lines
20 KiB
831 lines
20 KiB
/* ------------------------------------------------------------------------ |
|
|
|
Python Codec Registry and support functions |
|
|
|
Written by Marc-Andre Lemburg (mal@lemburg.com). |
|
|
|
Copyright (c) Corporation for National Research Initiatives. |
|
|
|
------------------------------------------------------------------------ */ |
|
|
|
#include "Python.h" |
|
#include <ctype.h> |
|
|
|
/* --- Codec Registry ----------------------------------------------------- */ |
|
|
|
/* Import the standard encodings package which will register the first |
|
codec search function. |
|
|
|
This is done in a lazy way so that the Unicode implementation does |
|
not downgrade startup time of scripts not needing it. |
|
|
|
ImportErrors are silently ignored by this function. Only one try is |
|
made. |
|
|
|
*/ |
|
|
|
static int _PyCodecRegistry_Init(void); /* Forward */ |
|
|
|
int PyCodec_Register(PyObject *search_function) |
|
{ |
|
PyInterpreterState *interp = PyThreadState_Get()->interp; |
|
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
goto onError; |
|
if (search_function == NULL) { |
|
PyErr_BadArgument(); |
|
goto onError; |
|
} |
|
if (!PyCallable_Check(search_function)) { |
|
PyErr_SetString(PyExc_TypeError, |
|
"argument must be callable"); |
|
goto onError; |
|
} |
|
return PyList_Append(interp->codec_search_path, search_function); |
|
|
|
onError: |
|
return -1; |
|
} |
|
|
|
/* Convert a string to a normalized Python string: all characters are |
|
converted to lower case, spaces are replaced with underscores. */ |
|
|
|
static |
|
PyObject *normalizestring(const char *string) |
|
{ |
|
register size_t i; |
|
size_t len = strlen(string); |
|
char *p; |
|
PyObject *v; |
|
|
|
if (len > INT_MAX) { |
|
PyErr_SetString(PyExc_OverflowError, "string is too large"); |
|
return NULL; |
|
} |
|
|
|
v = PyString_FromStringAndSize(NULL, (int)len); |
|
if (v == NULL) |
|
return NULL; |
|
p = PyString_AS_STRING(v); |
|
for (i = 0; i < len; i++) { |
|
register char ch = string[i]; |
|
if (ch == ' ') |
|
ch = '-'; |
|
else |
|
ch = tolower(ch); |
|
p[i] = ch; |
|
} |
|
return v; |
|
} |
|
|
|
/* Lookup the given encoding and return a tuple providing the codec |
|
facilities. |
|
|
|
The encoding string is looked up converted to all lower-case |
|
characters. This makes encodings looked up through this mechanism |
|
effectively case-insensitive. |
|
|
|
If no codec is found, a LookupError is set and NULL returned. |
|
|
|
As side effect, this tries to load the encodings package, if not |
|
yet done. This is part of the lazy load strategy for the encodings |
|
package. |
|
|
|
*/ |
|
|
|
PyObject *_PyCodec_Lookup(const char *encoding) |
|
{ |
|
PyInterpreterState *interp; |
|
PyObject *result, *args = NULL, *v; |
|
int i, len; |
|
|
|
if (encoding == NULL) { |
|
PyErr_BadArgument(); |
|
goto onError; |
|
} |
|
|
|
interp = PyThreadState_Get()->interp; |
|
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
goto onError; |
|
|
|
/* Convert the encoding to a normalized Python string: all |
|
characters are converted to lower case, spaces and hyphens are |
|
replaced with underscores. */ |
|
v = normalizestring(encoding); |
|
if (v == NULL) |
|
goto onError; |
|
PyString_InternInPlace(&v); |
|
|
|
/* First, try to lookup the name in the registry dictionary */ |
|
result = PyDict_GetItem(interp->codec_search_cache, v); |
|
if (result != NULL) { |
|
Py_INCREF(result); |
|
Py_DECREF(v); |
|
return result; |
|
} |
|
|
|
/* Next, scan the search functions in order of registration */ |
|
args = PyTuple_New(1); |
|
if (args == NULL) |
|
goto onError; |
|
PyTuple_SET_ITEM(args,0,v); |
|
|
|
len = PyList_Size(interp->codec_search_path); |
|
if (len < 0) |
|
goto onError; |
|
if (len == 0) { |
|
PyErr_SetString(PyExc_LookupError, |
|
"no codec search functions registered: " |
|
"can't find encoding"); |
|
goto onError; |
|
} |
|
|
|
for (i = 0; i < len; i++) { |
|
PyObject *func; |
|
|
|
func = PyList_GetItem(interp->codec_search_path, i); |
|
if (func == NULL) |
|
goto onError; |
|
result = PyEval_CallObject(func, args); |
|
if (result == NULL) |
|
goto onError; |
|
if (result == Py_None) { |
|
Py_DECREF(result); |
|
continue; |
|
} |
|
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { |
|
PyErr_SetString(PyExc_TypeError, |
|
"codec search functions must return 4-tuples"); |
|
Py_DECREF(result); |
|
goto onError; |
|
} |
|
break; |
|
} |
|
if (i == len) { |
|
/* XXX Perhaps we should cache misses too ? */ |
|
PyErr_Format(PyExc_LookupError, |
|
"unknown encoding: %s", encoding); |
|
goto onError; |
|
} |
|
|
|
/* Cache and return the result */ |
|
PyDict_SetItem(interp->codec_search_cache, v, result); |
|
Py_DECREF(args); |
|
return result; |
|
|
|
onError: |
|
Py_XDECREF(args); |
|
return NULL; |
|
} |
|
|
|
static |
|
PyObject *args_tuple(PyObject *object, |
|
const char *errors) |
|
{ |
|
PyObject *args; |
|
|
|
args = PyTuple_New(1 + (errors != NULL)); |
|
if (args == NULL) |
|
return NULL; |
|
Py_INCREF(object); |
|
PyTuple_SET_ITEM(args,0,object); |
|
if (errors) { |
|
PyObject *v; |
|
|
|
v = PyString_FromString(errors); |
|
if (v == NULL) { |
|
Py_DECREF(args); |
|
return NULL; |
|
} |
|
PyTuple_SET_ITEM(args, 1, v); |
|
} |
|
return args; |
|
} |
|
|
|
/* Build a codec by calling factory(stream[,errors]) or just |
|
factory(errors) depending on whether the given parameters are |
|
non-NULL. */ |
|
|
|
static |
|
PyObject *build_stream_codec(PyObject *factory, |
|
PyObject *stream, |
|
const char *errors) |
|
{ |
|
PyObject *args, *codec; |
|
|
|
args = args_tuple(stream, errors); |
|
if (args == NULL) |
|
return NULL; |
|
|
|
codec = PyEval_CallObject(factory, args); |
|
Py_DECREF(args); |
|
return codec; |
|
} |
|
|
|
/* Convenience APIs to query the Codec registry. |
|
|
|
All APIs return a codec object with incremented refcount. |
|
|
|
*/ |
|
|
|
PyObject *PyCodec_Encoder(const char *encoding) |
|
{ |
|
PyObject *codecs; |
|
PyObject *v; |
|
|
|
codecs = _PyCodec_Lookup(encoding); |
|
if (codecs == NULL) |
|
goto onError; |
|
v = PyTuple_GET_ITEM(codecs,0); |
|
Py_DECREF(codecs); |
|
Py_INCREF(v); |
|
return v; |
|
|
|
onError: |
|
return NULL; |
|
} |
|
|
|
PyObject *PyCodec_Decoder(const char *encoding) |
|
{ |
|
PyObject *codecs; |
|
PyObject *v; |
|
|
|
codecs = _PyCodec_Lookup(encoding); |
|
if (codecs == NULL) |
|
goto onError; |
|
v = PyTuple_GET_ITEM(codecs,1); |
|
Py_DECREF(codecs); |
|
Py_INCREF(v); |
|
return v; |
|
|
|
onError: |
|
return NULL; |
|
} |
|
|
|
PyObject *PyCodec_StreamReader(const char *encoding, |
|
PyObject *stream, |
|
const char *errors) |
|
{ |
|
PyObject *codecs, *ret; |
|
|
|
codecs = _PyCodec_Lookup(encoding); |
|
if (codecs == NULL) |
|
goto onError; |
|
ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors); |
|
Py_DECREF(codecs); |
|
return ret; |
|
|
|
onError: |
|
return NULL; |
|
} |
|
|
|
PyObject *PyCodec_StreamWriter(const char *encoding, |
|
PyObject *stream, |
|
const char *errors) |
|
{ |
|
PyObject *codecs, *ret; |
|
|
|
codecs = _PyCodec_Lookup(encoding); |
|
if (codecs == NULL) |
|
goto onError; |
|
ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors); |
|
Py_DECREF(codecs); |
|
return ret; |
|
|
|
onError: |
|
return NULL; |
|
} |
|
|
|
/* Encode an object (e.g. an Unicode object) using the given encoding |
|
and return the resulting encoded object (usually a Python string). |
|
|
|
errors is passed to the encoder factory as argument if non-NULL. */ |
|
|
|
PyObject *PyCodec_Encode(PyObject *object, |
|
const char *encoding, |
|
const char *errors) |
|
{ |
|
PyObject *encoder = NULL; |
|
PyObject *args = NULL, *result; |
|
PyObject *v; |
|
|
|
encoder = PyCodec_Encoder(encoding); |
|
if (encoder == NULL) |
|
goto onError; |
|
|
|
args = args_tuple(object, errors); |
|
if (args == NULL) |
|
goto onError; |
|
|
|
result = PyEval_CallObject(encoder,args); |
|
if (result == NULL) |
|
goto onError; |
|
|
|
if (!PyTuple_Check(result) || |
|
PyTuple_GET_SIZE(result) != 2) { |
|
PyErr_SetString(PyExc_TypeError, |
|
"encoder must return a tuple (object,integer)"); |
|
goto onError; |
|
} |
|
v = PyTuple_GET_ITEM(result,0); |
|
Py_INCREF(v); |
|
/* We don't check or use the second (integer) entry. */ |
|
|
|
Py_DECREF(args); |
|
Py_DECREF(encoder); |
|
Py_DECREF(result); |
|
return v; |
|
|
|
onError: |
|
Py_XDECREF(args); |
|
Py_XDECREF(encoder); |
|
return NULL; |
|
} |
|
|
|
/* Decode an object (usually a Python string) using the given encoding |
|
and return an equivalent object (e.g. an Unicode object). |
|
|
|
errors is passed to the decoder factory as argument if non-NULL. */ |
|
|
|
PyObject *PyCodec_Decode(PyObject *object, |
|
const char *encoding, |
|
const char *errors) |
|
{ |
|
PyObject *decoder = NULL; |
|
PyObject *args = NULL, *result = NULL; |
|
PyObject *v; |
|
|
|
decoder = PyCodec_Decoder(encoding); |
|
if (decoder == NULL) |
|
goto onError; |
|
|
|
args = args_tuple(object, errors); |
|
if (args == NULL) |
|
goto onError; |
|
|
|
result = PyEval_CallObject(decoder,args); |
|
if (result == NULL) |
|
goto onError; |
|
if (!PyTuple_Check(result) || |
|
PyTuple_GET_SIZE(result) != 2) { |
|
PyErr_SetString(PyExc_TypeError, |
|
"decoder must return a tuple (object,integer)"); |
|
goto onError; |
|
} |
|
v = PyTuple_GET_ITEM(result,0); |
|
Py_INCREF(v); |
|
/* We don't check or use the second (integer) entry. */ |
|
|
|
Py_DECREF(args); |
|
Py_DECREF(decoder); |
|
Py_DECREF(result); |
|
return v; |
|
|
|
onError: |
|
Py_XDECREF(args); |
|
Py_XDECREF(decoder); |
|
Py_XDECREF(result); |
|
return NULL; |
|
} |
|
|
|
/* Register the error handling callback function error under the name |
|
name. This function will be called by the codec when it encounters |
|
an unencodable characters/undecodable bytes and doesn't know the |
|
callback name, when name is specified as the error parameter |
|
in the call to the encode/decode function. |
|
Return 0 on success, -1 on error */ |
|
int PyCodec_RegisterError(const char *name, PyObject *error) |
|
{ |
|
PyInterpreterState *interp = PyThreadState_Get()->interp; |
|
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
return -1; |
|
if (!PyCallable_Check(error)) { |
|
PyErr_SetString(PyExc_TypeError, "handler must be callable"); |
|
return -1; |
|
} |
|
return PyDict_SetItemString(interp->codec_error_registry, |
|
(char *)name, error); |
|
} |
|
|
|
/* Lookup the error handling callback function registered under the |
|
name error. As a special case NULL can be passed, in which case |
|
the error handling callback for strict encoding will be returned. */ |
|
PyObject *PyCodec_LookupError(const char *name) |
|
{ |
|
PyObject *handler = NULL; |
|
|
|
PyInterpreterState *interp = PyThreadState_Get()->interp; |
|
if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) |
|
return NULL; |
|
|
|
if (name==NULL) |
|
name = "strict"; |
|
handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name); |
|
if (!handler) |
|
PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); |
|
else |
|
Py_INCREF(handler); |
|
return handler; |
|
} |
|
|
|
static void wrong_exception_type(PyObject *exc) |
|
{ |
|
PyObject *type = PyObject_GetAttrString(exc, "__class__"); |
|
if (type != NULL) { |
|
PyObject *name = PyObject_GetAttrString(type, "__name__"); |
|
Py_DECREF(type); |
|
if (name != NULL) { |
|
PyObject *string = PyObject_Str(name); |
|
Py_DECREF(name); |
|
if (string != NULL) { |
|
PyErr_Format(PyExc_TypeError, |
|
"don't know how to handle %.400s in error callback", |
|
PyString_AS_STRING(string)); |
|
Py_DECREF(string); |
|
} |
|
} |
|
} |
|
} |
|
|
|
PyObject *PyCodec_StrictErrors(PyObject *exc) |
|
{ |
|
if (PyInstance_Check(exc)) |
|
PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class, |
|
exc); |
|
else |
|
PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); |
|
return NULL; |
|
} |
|
|
|
|
|
#ifdef Py_USING_UNICODE |
|
PyObject *PyCodec_IgnoreErrors(PyObject *exc) |
|
{ |
|
int end; |
|
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
} |
|
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { |
|
if (PyUnicodeDecodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
} |
|
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { |
|
if (PyUnicodeTranslateError_GetEnd(exc, &end)) |
|
return NULL; |
|
} |
|
else { |
|
wrong_exception_type(exc); |
|
return NULL; |
|
} |
|
/* ouch: passing NULL, 0, pos gives None instead of u'' */ |
|
return Py_BuildValue("(u#i)", &end, 0, end); |
|
} |
|
|
|
|
|
PyObject *PyCodec_ReplaceErrors(PyObject *exc) |
|
{ |
|
PyObject *restuple; |
|
int start; |
|
int end; |
|
int i; |
|
|
|
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
PyObject *res; |
|
Py_UNICODE *p; |
|
if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
return NULL; |
|
if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
res = PyUnicode_FromUnicode(NULL, end-start); |
|
if (res == NULL) |
|
return NULL; |
|
for (p = PyUnicode_AS_UNICODE(res), i = start; |
|
i<end; ++p, ++i) |
|
*p = '?'; |
|
restuple = Py_BuildValue("(Oi)", res, end); |
|
Py_DECREF(res); |
|
return restuple; |
|
} |
|
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { |
|
Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER; |
|
if (PyUnicodeDecodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
return Py_BuildValue("(u#i)", &res, 1, end); |
|
} |
|
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { |
|
PyObject *res; |
|
Py_UNICODE *p; |
|
if (PyUnicodeTranslateError_GetStart(exc, &start)) |
|
return NULL; |
|
if (PyUnicodeTranslateError_GetEnd(exc, &end)) |
|
return NULL; |
|
res = PyUnicode_FromUnicode(NULL, end-start); |
|
if (res == NULL) |
|
return NULL; |
|
for (p = PyUnicode_AS_UNICODE(res), i = start; |
|
i<end; ++p, ++i) |
|
*p = Py_UNICODE_REPLACEMENT_CHARACTER; |
|
restuple = Py_BuildValue("(Oi)", res, end); |
|
Py_DECREF(res); |
|
return restuple; |
|
} |
|
else { |
|
wrong_exception_type(exc); |
|
return NULL; |
|
} |
|
} |
|
|
|
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) |
|
{ |
|
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
PyObject *restuple; |
|
PyObject *object; |
|
int start; |
|
int end; |
|
PyObject *res; |
|
Py_UNICODE *p; |
|
Py_UNICODE *startp; |
|
Py_UNICODE *outp; |
|
int ressize; |
|
if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
return NULL; |
|
if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
if (!(object = PyUnicodeEncodeError_GetObject(exc))) |
|
return NULL; |
|
startp = PyUnicode_AS_UNICODE(object); |
|
for (p = startp+start, ressize = 0; p < startp+end; ++p) { |
|
if (*p<10) |
|
ressize += 2+1+1; |
|
else if (*p<100) |
|
ressize += 2+2+1; |
|
else if (*p<1000) |
|
ressize += 2+3+1; |
|
else if (*p<10000) |
|
ressize += 2+4+1; |
|
else if (*p<100000) |
|
ressize += 2+5+1; |
|
else if (*p<1000000) |
|
ressize += 2+6+1; |
|
else |
|
ressize += 2+7+1; |
|
} |
|
/* allocate replacement */ |
|
res = PyUnicode_FromUnicode(NULL, ressize); |
|
if (res == NULL) { |
|
Py_DECREF(object); |
|
return NULL; |
|
} |
|
/* generate replacement */ |
|
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); |
|
p < startp+end; ++p) { |
|
Py_UNICODE c = *p; |
|
int digits; |
|
int base; |
|
*outp++ = '&'; |
|
*outp++ = '#'; |
|
if (*p<10) { |
|
digits = 1; |
|
base = 1; |
|
} |
|
else if (*p<100) { |
|
digits = 2; |
|
base = 10; |
|
} |
|
else if (*p<1000) { |
|
digits = 3; |
|
base = 100; |
|
} |
|
else if (*p<10000) { |
|
digits = 4; |
|
base = 1000; |
|
} |
|
else if (*p<100000) { |
|
digits = 5; |
|
base = 10000; |
|
} |
|
else if (*p<1000000) { |
|
digits = 6; |
|
base = 100000; |
|
} |
|
else { |
|
digits = 7; |
|
base = 1000000; |
|
} |
|
while (digits-->0) { |
|
*outp++ = '0' + c/base; |
|
c %= base; |
|
base /= 10; |
|
} |
|
*outp++ = ';'; |
|
} |
|
restuple = Py_BuildValue("(Oi)", res, end); |
|
Py_DECREF(res); |
|
Py_DECREF(object); |
|
return restuple; |
|
} |
|
else { |
|
wrong_exception_type(exc); |
|
return NULL; |
|
} |
|
} |
|
|
|
static Py_UNICODE hexdigits[] = { |
|
'0', '1', '2', '3', '4', '5', '6', '7', |
|
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f' |
|
}; |
|
|
|
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) |
|
{ |
|
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { |
|
PyObject *restuple; |
|
PyObject *object; |
|
int start; |
|
int end; |
|
PyObject *res; |
|
Py_UNICODE *p; |
|
Py_UNICODE *startp; |
|
Py_UNICODE *outp; |
|
int ressize; |
|
if (PyUnicodeEncodeError_GetStart(exc, &start)) |
|
return NULL; |
|
if (PyUnicodeEncodeError_GetEnd(exc, &end)) |
|
return NULL; |
|
if (!(object = PyUnicodeEncodeError_GetObject(exc))) |
|
return NULL; |
|
startp = PyUnicode_AS_UNICODE(object); |
|
for (p = startp+start, ressize = 0; p < startp+end; ++p) { |
|
if (*p >= 0x00010000) |
|
ressize += 1+1+8; |
|
else if (*p >= 0x100) { |
|
ressize += 1+1+4; |
|
} |
|
else |
|
ressize += 1+1+2; |
|
} |
|
res = PyUnicode_FromUnicode(NULL, ressize); |
|
if (res==NULL) |
|
return NULL; |
|
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); |
|
p < startp+end; ++p) { |
|
Py_UNICODE c = *p; |
|
*outp++ = '\\'; |
|
if (c >= 0x00010000) { |
|
*outp++ = 'U'; |
|
*outp++ = hexdigits[(c>>28)&0xf]; |
|
*outp++ = hexdigits[(c>>24)&0xf]; |
|
*outp++ = hexdigits[(c>>20)&0xf]; |
|
*outp++ = hexdigits[(c>>16)&0xf]; |
|
*outp++ = hexdigits[(c>>12)&0xf]; |
|
*outp++ = hexdigits[(c>>8)&0xf]; |
|
} |
|
else if (c >= 0x100) { |
|
*outp++ = 'u'; |
|
*outp++ = hexdigits[(c>>12)&0xf]; |
|
*outp++ = hexdigits[(c>>8)&0xf]; |
|
} |
|
else |
|
*outp++ = 'x'; |
|
*outp++ = hexdigits[(c>>4)&0xf]; |
|
*outp++ = hexdigits[c&0xf]; |
|
} |
|
|
|
restuple = Py_BuildValue("(Oi)", res, end); |
|
Py_DECREF(res); |
|
Py_DECREF(object); |
|
return restuple; |
|
} |
|
else { |
|
wrong_exception_type(exc); |
|
return NULL; |
|
} |
|
} |
|
#endif |
|
|
|
static PyObject *strict_errors(PyObject *self, PyObject *exc) |
|
{ |
|
return PyCodec_StrictErrors(exc); |
|
} |
|
|
|
|
|
#ifdef Py_USING_UNICODE |
|
static PyObject *ignore_errors(PyObject *self, PyObject *exc) |
|
{ |
|
return PyCodec_IgnoreErrors(exc); |
|
} |
|
|
|
|
|
static PyObject *replace_errors(PyObject *self, PyObject *exc) |
|
{ |
|
return PyCodec_ReplaceErrors(exc); |
|
} |
|
|
|
|
|
static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc) |
|
{ |
|
return PyCodec_XMLCharRefReplaceErrors(exc); |
|
} |
|
|
|
|
|
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) |
|
{ |
|
return PyCodec_BackslashReplaceErrors(exc); |
|
} |
|
#endif |
|
|
|
static int _PyCodecRegistry_Init(void) |
|
{ |
|
static struct { |
|
char *name; |
|
PyMethodDef def; |
|
} methods[] = |
|
{ |
|
{ |
|
"strict", |
|
{ |
|
"strict_errors", |
|
strict_errors, |
|
METH_O |
|
} |
|
}, |
|
#ifdef Py_USING_UNICODE |
|
{ |
|
"ignore", |
|
{ |
|
"ignore_errors", |
|
ignore_errors, |
|
METH_O |
|
} |
|
}, |
|
{ |
|
"replace", |
|
{ |
|
"replace_errors", |
|
replace_errors, |
|
METH_O |
|
} |
|
}, |
|
{ |
|
"xmlcharrefreplace", |
|
{ |
|
"xmlcharrefreplace_errors", |
|
xmlcharrefreplace_errors, |
|
METH_O |
|
} |
|
}, |
|
{ |
|
"backslashreplace", |
|
{ |
|
"backslashreplace_errors", |
|
backslashreplace_errors, |
|
METH_O |
|
} |
|
} |
|
#endif |
|
}; |
|
|
|
PyInterpreterState *interp = PyThreadState_Get()->interp; |
|
PyObject *mod; |
|
int i; |
|
|
|
if (interp->codec_search_path != NULL) |
|
return 0; |
|
|
|
interp->codec_search_path = PyList_New(0); |
|
interp->codec_search_cache = PyDict_New(); |
|
interp->codec_error_registry = PyDict_New(); |
|
|
|
if (interp->codec_error_registry) { |
|
for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) { |
|
PyObject *func = PyCFunction_New(&methods[i].def, NULL); |
|
int res; |
|
if (!func) |
|
Py_FatalError("can't initialize codec error registry"); |
|
res = PyCodec_RegisterError(methods[i].name, func); |
|
Py_DECREF(func); |
|
if (res) |
|
Py_FatalError("can't initialize codec error registry"); |
|
} |
|
} |
|
|
|
if (interp->codec_search_path == NULL || |
|
interp->codec_search_cache == NULL || |
|
interp->codec_error_registry == NULL) |
|
Py_FatalError("can't initialize codec registry"); |
|
|
|
mod = PyImport_ImportModuleEx("encodings", NULL, NULL, NULL); |
|
if (mod == NULL) { |
|
if (PyErr_ExceptionMatches(PyExc_ImportError)) { |
|
/* Ignore ImportErrors... this is done so that |
|
distributions can disable the encodings package. Note |
|
that other errors are not masked, e.g. SystemErrors |
|
raised to inform the user of an error in the Python |
|
configuration are still reported back to the user. */ |
|
PyErr_Clear(); |
|
return 0; |
|
} |
|
return -1; |
|
} |
|
Py_DECREF(mod); |
|
return 0; |
|
}
|
|
|