00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <Python.h>
00024 #include <stdio.h>
00025
00026 #include <datetime.h>
00027
00028 #include "_cbson.h"
00029 #include "buffer.h"
00030 #include "time64.h"
00031 #include "encoding_helpers.h"
00032
00033 static PyObject* Binary = NULL;
00034 static PyObject* Code = NULL;
00035 static PyObject* ObjectId = NULL;
00036 static PyObject* DBRef = NULL;
00037 static PyObject* RECompile = NULL;
00038 static PyObject* UUID = NULL;
00039 static PyObject* Timestamp = NULL;
00040 static PyObject* MinKey = NULL;
00041 static PyObject* MaxKey = NULL;
00042 static PyObject* UTC = NULL;
00043 static PyTypeObject* REType = NULL;
00044
00045 #if PY_VERSION_HEX < 0x02050000
00046 #define WARN(category, message) \
00047 PyErr_Warn((category), (message))
00048 #else
00049 #define WARN(category, message) \
00050 PyErr_WarnEx((category), (message), 1)
00051 #endif
00052
00053
00054 #define FLAGS_SIZE 7
00055
00056 #if defined(WIN32) || defined(_MSC_VER)
00057
00058
00059
00060
00061
00062
00063 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
00064 #define INT2STRING(buffer, i) \
00065 *(buffer) = malloc(_scprintf("%d", (i)) + 1), \
00066 (!(buffer) ? \
00067 -1 : \
00068 _snprintf_s(*(buffer), \
00069 _scprintf("%d", (i)) + 1, \
00070 _scprintf("%d", (i)) + 1, \
00071 "%d", \
00072 (i)))
00073 #define STRCAT(dest, n, src) strcat_s((dest), (n), (src))
00074 #else
00075 #define INT2STRING(buffer, i) \
00076 *(buffer) = malloc(_scprintf("%d", (i)) + 1), \
00077 (!(buffer) ? \
00078 -1 : \
00079 _snprintf(*(buffer), \
00080 _scprintf("%d", (i)) + 1, \
00081 "%d", \
00082 (i)))
00083 #define STRCAT(dest, n, src) strcat((dest), (src))
00084 #endif
00085 #else
00086 #define INT2STRING(buffer, i) asprintf((buffer), "%d", (i))
00087 #define STRCAT(dest, n, src) strcat((dest), (src))
00088 #endif
00089
00090
00091 static PyObject* elements_to_dict(const char* string, int max,
00092 PyObject* as_class, unsigned char tz_aware);
00093
00094 static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* value,
00095 unsigned char check_keys, unsigned char first_attempt);
00096
00097
00098 static PyObject* datetime_from_millis(long long millis) {
00099 int microseconds = (millis % 1000) * 1000;
00100 Time64_T seconds = millis / 1000;
00101 struct TM timeinfo;
00102 gmtime64_r(&seconds, &timeinfo);
00103
00104 return PyDateTime_FromDateAndTime(timeinfo.tm_year + 1900,
00105 timeinfo.tm_mon + 1,
00106 timeinfo.tm_mday,
00107 timeinfo.tm_hour,
00108 timeinfo.tm_min,
00109 timeinfo.tm_sec,
00110 microseconds);
00111 }
00112
00113 static long long millis_from_datetime(PyObject* datetime) {
00114 struct TM timeinfo;
00115 long long millis;
00116
00117 timeinfo.tm_year = PyDateTime_GET_YEAR(datetime) - 1900;
00118 timeinfo.tm_mon = PyDateTime_GET_MONTH(datetime) - 1;
00119 timeinfo.tm_mday = PyDateTime_GET_DAY(datetime);
00120 timeinfo.tm_hour = PyDateTime_DATE_GET_HOUR(datetime);
00121 timeinfo.tm_min = PyDateTime_DATE_GET_MINUTE(datetime);
00122 timeinfo.tm_sec = PyDateTime_DATE_GET_SECOND(datetime);
00123
00124 millis = timegm64(&timeinfo) * 1000;
00125 millis += PyDateTime_DATE_GET_MICROSECOND(datetime) / 1000;
00126 return millis;
00127 }
00128
00129
00130 int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
00131 if (buffer_write(buffer, data, size)) {
00132 PyErr_NoMemory();
00133 return 0;
00134 }
00135 return 1;
00136 }
00137
00138
00139 static int write_string(buffer_t buffer, PyObject* py_string) {
00140 Py_ssize_t string_length;
00141 const char* string = PyString_AsString(py_string);
00142 if (!string) {
00143 return 1;
00144 }
00145 string_length = PyString_Size(py_string) + 1;
00146
00147 if (!buffer_write_bytes(buffer, (const char*)&string_length, 4)) {
00148 return 0;
00149 }
00150 if (!buffer_write_bytes(buffer, string, string_length)) {
00151 return 0;
00152 }
00153 return 1;
00154 }
00155
00156
00157
00158
00159 static PyObject* _error(char* name) {
00160 PyObject* error;
00161 PyObject* errors = PyImport_ImportModule("bson.errors");
00162 if (!errors) {
00163 return NULL;
00164 }
00165 error = PyObject_GetAttrString(errors, name);
00166 Py_DECREF(errors);
00167 return error;
00168 }
00169
00170
00171
00172
00173 static int _reload_object(PyObject** object, char* module_name, char* object_name) {
00174 PyObject* module;
00175
00176 module = PyImport_ImportModule(module_name);
00177 if (!module) {
00178 return 1;
00179 }
00180
00181 *object = PyObject_GetAttrString(module, object_name);
00182 Py_DECREF(module);
00183
00184 return (*object) ? 0 : 2;
00185 }
00186
00187
00188
00189
00190 static int _reload_python_objects(void) {
00191 if (_reload_object(&Binary, "bson.binary", "Binary") ||
00192 _reload_object(&Code, "bson.code", "Code") ||
00193 _reload_object(&ObjectId, "bson.objectid", "ObjectId") ||
00194 _reload_object(&DBRef, "bson.dbref", "DBRef") ||
00195 _reload_object(&Timestamp, "bson.timestamp", "Timestamp") ||
00196 _reload_object(&MinKey, "bson.min_key", "MinKey") ||
00197 _reload_object(&MaxKey, "bson.max_key", "MaxKey") ||
00198 _reload_object(&UTC, "bson.tz_util", "utc") ||
00199 _reload_object(&RECompile, "re", "compile")) {
00200 return 1;
00201 }
00202
00203 if (_reload_object(&UUID, "uuid", "UUID") == 1) {
00204 UUID = NULL;
00205 PyErr_Clear();
00206 }
00207
00208 REType = PyObject_CallFunction(RECompile, "O",
00209 PyString_FromString(""))->ob_type;
00210 return 0;
00211 }
00212
00213 static int write_element_to_buffer(buffer_t buffer, int type_byte,
00214 PyObject* value, unsigned char check_keys,
00215 unsigned char first_attempt) {
00216 int result;
00217 if(Py_EnterRecursiveCall(" while encoding an object to BSON "))
00218 return 0;
00219 result = _write_element_to_buffer(buffer, type_byte, value,
00220 check_keys, first_attempt);
00221 Py_LeaveRecursiveCall();
00222 return result;
00223 }
00224
00225
00226
00227
00228
00229
00230 static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* value,
00231 unsigned char check_keys, unsigned char first_attempt) {
00232 if (PyBool_Check(value)) {
00233 const long bool = PyInt_AsLong(value);
00234 const char c = bool ? 0x01 : 0x00;
00235 *(buffer_get_buffer(buffer) + type_byte) = 0x08;
00236 return buffer_write_bytes(buffer, &c, 1);
00237 }
00238 else if (PyInt_Check(value) || PyLong_Check(value)) {
00239 const long long_value = PyInt_AsLong(value);
00240 const int int_value = (int)long_value;
00241 if (PyErr_Occurred() || long_value != int_value) {
00242 long long long_long_value;
00243 PyErr_Clear();
00244 long_long_value = PyLong_AsLongLong(value);
00245 if (PyErr_Occurred()) {
00246 PyErr_SetString(PyExc_OverflowError,
00247 "MongoDB can only handle up to 8-byte ints");
00248 return 0;
00249 }
00250 *(buffer_get_buffer(buffer) + type_byte) = 0x12;
00251 return buffer_write_bytes(buffer, (const char*)&long_long_value, 8);
00252 }
00253 *(buffer_get_buffer(buffer) + type_byte) = 0x10;
00254 return buffer_write_bytes(buffer, (const char*)&int_value, 4);
00255 } else if (PyFloat_Check(value)) {
00256 const double d = PyFloat_AsDouble(value);
00257 *(buffer_get_buffer(buffer) + type_byte) = 0x01;
00258 return buffer_write_bytes(buffer, (const char*)&d, 8);
00259 } else if (value == Py_None) {
00260 *(buffer_get_buffer(buffer) + type_byte) = 0x0A;
00261 return 1;
00262 } else if (PyDict_Check(value)) {
00263 *(buffer_get_buffer(buffer) + type_byte) = 0x03;
00264 return write_dict(buffer, value, check_keys, 0);
00265 } else if (PyList_Check(value) || PyTuple_Check(value)) {
00266 int start_position,
00267 length_location,
00268 items,
00269 length,
00270 i;
00271 char zero = 0;
00272
00273 *(buffer_get_buffer(buffer) + type_byte) = 0x04;
00274 start_position = buffer_get_position(buffer);
00275
00276
00277 length_location = buffer_save_space(buffer, 4);
00278 if (length_location == -1) {
00279 PyErr_NoMemory();
00280 return 0;
00281 }
00282
00283 items = PySequence_Size(value);
00284 for(i = 0; i < items; i++) {
00285 int list_type_byte = buffer_save_space(buffer, 1);
00286 char* name;
00287 PyObject* item_value;
00288
00289 if (list_type_byte == -1) {
00290 PyErr_NoMemory();
00291 return 0;
00292 }
00293 if (INT2STRING(&name, i) < 0 || !name) {
00294 PyErr_NoMemory();
00295 return 0;
00296 }
00297 if (!buffer_write_bytes(buffer, name, strlen(name) + 1)) {
00298 free(name);
00299 return 0;
00300 }
00301 free(name);
00302
00303 item_value = PySequence_GetItem(value, i);
00304 if (!write_element_to_buffer(buffer, list_type_byte, item_value, check_keys, 1)) {
00305 Py_DECREF(item_value);
00306 return 0;
00307 }
00308 Py_DECREF(item_value);
00309 }
00310
00311
00312 if (!buffer_write_bytes(buffer, &zero, 1)) {
00313 return 0;
00314 }
00315 length = buffer_get_position(buffer) - start_position;
00316 memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
00317 return 1;
00318 } else if (PyObject_IsInstance(value, Binary)) {
00319 PyObject* subtype_object;
00320
00321 *(buffer_get_buffer(buffer) + type_byte) = 0x05;
00322 subtype_object = PyObject_GetAttrString(value, "subtype");
00323 if (!subtype_object) {
00324 return 0;
00325 }
00326 {
00327 const long long_subtype = PyInt_AsLong(subtype_object);
00328 const char subtype = (const char)long_subtype;
00329 const int length = PyString_Size(value);
00330
00331 Py_DECREF(subtype_object);
00332 if (subtype == 2) {
00333 const int other_length = length + 4;
00334 if (!buffer_write_bytes(buffer, (const char*)&other_length, 4)) {
00335 return 0;
00336 }
00337 if (!buffer_write_bytes(buffer, &subtype, 1)) {
00338 return 0;
00339 }
00340 }
00341 if (!buffer_write_bytes(buffer, (const char*)&length, 4)) {
00342 return 0;
00343 }
00344 if (subtype != 2) {
00345 if (!buffer_write_bytes(buffer, &subtype, 1)) {
00346 return 0;
00347 }
00348 }
00349 {
00350 const char* string = PyString_AsString(value);
00351 if (!string) {
00352 return 0;
00353 }
00354 if (!buffer_write_bytes(buffer, string, length)) {
00355 return 0;
00356 }
00357 }
00358 }
00359 return 1;
00360 } else if (UUID && PyObject_IsInstance(value, UUID)) {
00361
00362
00363
00364 int length = 16;
00365 const char subtype = 3;
00366
00367 PyObject* bytes;
00368
00369 *(buffer_get_buffer(buffer) + type_byte) = 0x05;
00370 if (!buffer_write_bytes(buffer, (const char*)&length, 4)) {
00371 return 0;
00372 }
00373 if (!buffer_write_bytes(buffer, &subtype, 1)) {
00374 return 0;
00375 }
00376
00377 bytes = PyObject_GetAttrString(value, "bytes");
00378 if (!bytes) {
00379 return 0;
00380 }
00381 if (!buffer_write_bytes(buffer, PyString_AsString(bytes), length)) {
00382 Py_DECREF(bytes);
00383 return 0;
00384 }
00385 Py_DECREF(bytes);
00386 return 1;
00387 } else if (PyObject_IsInstance(value, Code)) {
00388 int start_position,
00389 length_location,
00390 length;
00391 PyObject* scope;
00392
00393 *(buffer_get_buffer(buffer) + type_byte) = 0x0F;
00394
00395 start_position = buffer_get_position(buffer);
00396
00397 length_location = buffer_save_space(buffer, 4);
00398 if (length_location == -1) {
00399 PyErr_NoMemory();
00400 return 0;
00401 }
00402
00403 if (!write_string(buffer, value)) {
00404 return 0;
00405 }
00406
00407 scope = PyObject_GetAttrString(value, "scope");
00408 if (!scope) {
00409 return 0;
00410 }
00411 if (!write_dict(buffer, scope, 0, 0)) {
00412 Py_DECREF(scope);
00413 return 0;
00414 }
00415 Py_DECREF(scope);
00416
00417 length = buffer_get_position(buffer) - start_position;
00418 memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
00419 return 1;
00420 } else if (PyString_Check(value)) {
00421 int result;
00422 result_t status;
00423
00424 *(buffer_get_buffer(buffer) + type_byte) = 0x02;
00425 status = check_string((const unsigned char*)PyString_AsString(value),
00426 PyString_Size(value), 1, 0);
00427 if (status == NOT_UTF_8) {
00428 PyObject* InvalidStringData = _error("InvalidStringData");
00429 PyErr_SetString(InvalidStringData,
00430 "strings in documents must be valid UTF-8");
00431 Py_DECREF(InvalidStringData);
00432 return 0;
00433 }
00434 result = write_string(buffer, value);
00435 return result;
00436 } else if (PyUnicode_Check(value)) {
00437 PyObject* encoded;
00438 int result;
00439
00440 *(buffer_get_buffer(buffer) + type_byte) = 0x02;
00441 encoded = PyUnicode_AsUTF8String(value);
00442 if (!encoded) {
00443 return 0;
00444 }
00445 result = write_string(buffer, encoded);
00446 Py_DECREF(encoded);
00447 return result;
00448 } else if (PyDateTime_Check(value)) {
00449 long long millis;
00450 PyObject* utcoffset = PyObject_CallMethod(value, "utcoffset", NULL);
00451 if (utcoffset != Py_None) {
00452 PyObject* result = PyNumber_Subtract(value, utcoffset);
00453 Py_DECREF(utcoffset);
00454 if (!result) {
00455 return 0;
00456 }
00457 millis = millis_from_datetime(result);
00458 Py_DECREF(result);
00459 } else {
00460 millis = millis_from_datetime(value);
00461 }
00462 *(buffer_get_buffer(buffer) + type_byte) = 0x09;
00463 return buffer_write_bytes(buffer, (const char*)&millis, 8);
00464 } else if (PyObject_IsInstance(value, ObjectId)) {
00465 PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id");
00466 if (!pystring) {
00467 return 0;
00468 }
00469 {
00470 const char* as_string = PyString_AsString(pystring);
00471 if (!as_string) {
00472 Py_DECREF(pystring);
00473 return 0;
00474 }
00475 if (!buffer_write_bytes(buffer, as_string, 12)) {
00476 Py_DECREF(pystring);
00477 return 0;
00478 }
00479 Py_DECREF(pystring);
00480 *(buffer_get_buffer(buffer) + type_byte) = 0x07;
00481 }
00482 return 1;
00483 } else if (PyObject_IsInstance(value, DBRef)) {
00484 PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL);
00485 if (!as_doc) {
00486 return 0;
00487 }
00488 if (!write_dict(buffer, as_doc, 0, 0)) {
00489 Py_DECREF(as_doc);
00490 return 0;
00491 }
00492 Py_DECREF(as_doc);
00493 *(buffer_get_buffer(buffer) + type_byte) = 0x03;
00494 return 1;
00495 } else if (PyObject_IsInstance(value, Timestamp)) {
00496 PyObject* obj;
00497 long i;
00498
00499 obj = PyObject_GetAttrString(value, "inc");
00500 if (!obj) {
00501 return 0;
00502 }
00503 i = PyInt_AsLong(obj);
00504 Py_DECREF(obj);
00505 if (!buffer_write_bytes(buffer, (const char*)&i, 4)) {
00506 return 0;
00507 }
00508
00509 obj = PyObject_GetAttrString(value, "time");
00510 if (!obj) {
00511 return 0;
00512 }
00513 i = PyInt_AsLong(obj);
00514 Py_DECREF(obj);
00515 if (!buffer_write_bytes(buffer, (const char*)&i, 4)) {
00516 return 0;
00517 }
00518
00519 *(buffer_get_buffer(buffer) + type_byte) = 0x11;
00520 return 1;
00521 }
00522 else if (PyObject_TypeCheck(value, REType)) {
00523 PyObject* py_flags = PyObject_GetAttrString(value, "flags");
00524 PyObject* py_pattern;
00525 PyObject* encoded_pattern;
00526 long int_flags;
00527 char flags[FLAGS_SIZE];
00528 char check_utf8 = 0;
00529 int pattern_length,
00530 flags_length;
00531 result_t status;
00532
00533 if (!py_flags) {
00534 return 0;
00535 }
00536 int_flags = PyInt_AsLong(py_flags);
00537 Py_DECREF(py_flags);
00538 py_pattern = PyObject_GetAttrString(value, "pattern");
00539 if (!py_pattern) {
00540 return 0;
00541 }
00542
00543 if (PyUnicode_Check(py_pattern)) {
00544 encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
00545 Py_DECREF(py_pattern);
00546 if (!encoded_pattern) {
00547 return 0;
00548 }
00549 } else {
00550 encoded_pattern = py_pattern;
00551 check_utf8 = 1;
00552 }
00553
00554 status = check_string((const unsigned char*)PyString_AsString(encoded_pattern),
00555 PyString_Size(encoded_pattern), check_utf8, 1);
00556 if (status == NOT_UTF_8) {
00557 PyObject* InvalidStringData = _error("InvalidStringData");
00558 PyErr_SetString(InvalidStringData,
00559 "regex patterns must be valid UTF-8");
00560 Py_DECREF(InvalidStringData);
00561 return 0;
00562 } else if (status == HAS_NULL) {
00563 PyObject* InvalidDocument = _error("InvalidDocument");
00564 PyErr_SetString(InvalidDocument,
00565 "regex patterns must not contain the NULL byte");
00566 Py_DECREF(InvalidDocument);
00567 return 0;
00568 }
00569
00570 {
00571 const char* pattern = PyString_AsString(encoded_pattern);
00572 pattern_length = strlen(pattern) + 1;
00573
00574 if (!buffer_write_bytes(buffer, pattern, pattern_length)) {
00575 Py_DECREF(encoded_pattern);
00576 return 0;
00577 }
00578 }
00579 Py_DECREF(encoded_pattern);
00580
00581 flags[0] = 0;
00582
00583 if (int_flags & 2) {
00584 STRCAT(flags, FLAGS_SIZE, "i");
00585 }
00586 if (int_flags & 4) {
00587 STRCAT(flags, FLAGS_SIZE, "l");
00588 }
00589 if (int_flags & 8) {
00590 STRCAT(flags, FLAGS_SIZE, "m");
00591 }
00592 if (int_flags & 16) {
00593 STRCAT(flags, FLAGS_SIZE, "s");
00594 }
00595 if (int_flags & 32) {
00596 STRCAT(flags, FLAGS_SIZE, "u");
00597 }
00598 if (int_flags & 64) {
00599 STRCAT(flags, FLAGS_SIZE, "x");
00600 }
00601 flags_length = strlen(flags) + 1;
00602 if (!buffer_write_bytes(buffer, flags, flags_length)) {
00603 return 0;
00604 }
00605 *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
00606 return 1;
00607 } else if (PyObject_IsInstance(value, MinKey)) {
00608 *(buffer_get_buffer(buffer) + type_byte) = 0xFF;
00609 return 1;
00610 } else if (PyObject_IsInstance(value, MaxKey)) {
00611 *(buffer_get_buffer(buffer) + type_byte) = 0x7F;
00612 return 1;
00613 } else if (first_attempt) {
00614
00615 if (WARN(PyExc_RuntimeWarning, "couldn't encode - reloading python "
00616 "modules and trying again. if you see this without getting "
00617 "an InvalidDocument exception please see http://api.mongodb"
00618 ".org/python/current/faq.html#does-pymongo-work-with-mod-"
00619 "wsgi") == -1) {
00620 return 0;
00621 }
00622 if (_reload_python_objects()) {
00623 return 0;
00624 }
00625 return write_element_to_buffer(buffer, type_byte, value, check_keys, 0);
00626 }
00627 {
00628 PyObject* errmsg = PyString_FromString("Cannot encode object: ");
00629 PyObject* repr = PyObject_Repr(value);
00630 PyObject* InvalidDocument = _error("InvalidDocument");
00631 PyString_ConcatAndDel(&errmsg, repr);
00632 PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
00633 Py_DECREF(errmsg);
00634 Py_DECREF(InvalidDocument);
00635 return 0;
00636 }
00637 }
00638
00639 static int check_key_name(const char* name,
00640 const Py_ssize_t name_length) {
00641 int i;
00642 if (name_length > 0 && name[0] == '$') {
00643 PyObject* InvalidDocument = _error("InvalidDocument");
00644 PyObject* errmsg = PyString_FromFormat("key '%s' must not start with '$'", name);
00645 PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
00646 Py_DECREF(errmsg);
00647 Py_DECREF(InvalidDocument);
00648 return 0;
00649 }
00650 for (i = 0; i < name_length; i++) {
00651 if (name[i] == '.') {
00652 PyObject* InvalidDocument = _error("InvalidDocument");
00653 PyObject* errmsg = PyString_FromFormat("key '%s' must not contain '.'", name);
00654 PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
00655 Py_DECREF(errmsg);
00656 Py_DECREF(InvalidDocument);
00657 return 0;
00658 }
00659 }
00660 return 1;
00661 }
00662
00663
00664
00665
00666 int write_pair(buffer_t buffer, const char* name, Py_ssize_t name_length,
00667 PyObject* value, unsigned char check_keys, unsigned char allow_id) {
00668 int type_byte;
00669
00670
00671
00672
00673 if (!allow_id && strcmp(name, "_id") == 0) {
00674 return 1;
00675 }
00676
00677 type_byte = buffer_save_space(buffer, 1);
00678 if (type_byte == -1) {
00679 PyErr_NoMemory();
00680 return 0;
00681 }
00682 if (check_keys && !check_key_name(name, name_length)) {
00683 return 0;
00684 }
00685 if (!buffer_write_bytes(buffer, name, name_length + 1)) {
00686 return 0;
00687 }
00688 if (!write_element_to_buffer(buffer, type_byte, value, check_keys, 1)) {
00689 return 0;
00690 }
00691 return 1;
00692 }
00693
00694 int decode_and_write_pair(buffer_t buffer,
00695 PyObject* key, PyObject* value,
00696 unsigned char check_keys, unsigned char top_level) {
00697 PyObject* encoded;
00698 if (PyUnicode_Check(key)) {
00699 result_t status;
00700 encoded = PyUnicode_AsUTF8String(key);
00701 if (!encoded) {
00702 return 0;
00703 }
00704 status = check_string((const unsigned char*)PyString_AsString(encoded),
00705 PyString_Size(encoded), 0, 1);
00706
00707 if (status == HAS_NULL) {
00708 PyObject* InvalidDocument = _error("InvalidDocument");
00709 PyErr_SetString(InvalidDocument,
00710 "Key names must not contain the NULL byte");
00711 Py_DECREF(InvalidDocument);
00712 return 0;
00713 }
00714 } else if (PyString_Check(key)) {
00715 result_t status;
00716 encoded = key;
00717 Py_INCREF(encoded);
00718
00719 status = check_string((const unsigned char*)PyString_AsString(encoded),
00720 PyString_Size(encoded), 1, 1);
00721
00722 if (status == NOT_UTF_8) {
00723 PyObject* InvalidStringData = _error("InvalidStringData");
00724 PyErr_SetString(InvalidStringData,
00725 "strings in documents must be valid UTF-8");
00726 Py_DECREF(InvalidStringData);
00727 return 0;
00728 } else if (status == HAS_NULL) {
00729 PyObject* InvalidDocument = _error("InvalidDocument");
00730 PyErr_SetString(InvalidDocument,
00731 "Key names must not contain the NULL byte");
00732 Py_DECREF(InvalidDocument);
00733 return 0;
00734 }
00735 } else {
00736 PyObject* InvalidDocument = _error("InvalidDocument");
00737 PyObject* errmsg = PyString_FromString("documents must have only string keys, key was ");
00738 PyObject* repr = PyObject_Repr(key);
00739 PyString_ConcatAndDel(&errmsg, repr);
00740 PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
00741 Py_DECREF(InvalidDocument);
00742 Py_DECREF(errmsg);
00743 return 0;
00744 }
00745
00746
00747 if (!write_pair(buffer, PyString_AsString(encoded),
00748 PyString_Size(encoded), value, check_keys, !top_level)) {
00749 Py_DECREF(encoded);
00750 return 0;
00751 }
00752
00753 Py_DECREF(encoded);
00754 return 1;
00755 }
00756
00757
00758 int write_dict(buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char top_level) {
00759 PyObject* key;
00760 PyObject* iter;
00761 char zero = 0;
00762 int length;
00763 int length_location;
00764
00765 if (!PyDict_Check(dict)) {
00766 PyObject* errmsg = PyString_FromString("encoder expected a mapping type but got: ");
00767 PyObject* repr = PyObject_Repr(dict);
00768 PyString_ConcatAndDel(&errmsg, repr);
00769 PyErr_SetString(PyExc_TypeError, PyString_AsString(errmsg));
00770 Py_DECREF(errmsg);
00771 return 0;
00772 }
00773
00774 length_location = buffer_save_space(buffer, 4);
00775 if (length_location == -1) {
00776 PyErr_NoMemory();
00777 return 0;
00778 }
00779
00780
00781 if (top_level) {
00782 PyObject* _id = PyDict_GetItemString(dict, "_id");
00783 if (_id) {
00784
00785
00786 if (!write_pair(buffer, "_id", 3, _id, 0, 1)) {
00787 return 0;
00788 }
00789 }
00790 }
00791
00792 iter = PyObject_GetIter(dict);
00793 if (iter == NULL) {
00794 return 0;
00795 }
00796 while ((key = PyIter_Next(iter)) != NULL) {
00797 PyObject* value = PyDict_GetItem(dict, key);
00798 if (!value) {
00799 PyErr_SetObject(PyExc_KeyError, key);
00800 Py_DECREF(key);
00801 Py_DECREF(iter);
00802 return 0;
00803 }
00804 if (!decode_and_write_pair(buffer, key, value, check_keys, top_level)) {
00805 Py_DECREF(key);
00806 Py_DECREF(iter);
00807 return 0;
00808 }
00809 Py_DECREF(key);
00810 }
00811 Py_DECREF(iter);
00812
00813
00814 if (!buffer_write_bytes(buffer, &zero, 1)) {
00815 return 0;
00816 }
00817 length = buffer_get_position(buffer) - length_location;
00818 memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
00819 return 1;
00820 }
00821
00822 static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
00823 PyObject* dict;
00824 PyObject* result;
00825 unsigned char check_keys;
00826 buffer_t buffer;
00827
00828 if (!PyArg_ParseTuple(args, "Ob", &dict, &check_keys)) {
00829 return NULL;
00830 }
00831
00832 buffer = buffer_new();
00833 if (!buffer) {
00834 PyErr_NoMemory();
00835 return NULL;
00836 }
00837
00838 if (!write_dict(buffer, dict, check_keys, 1)) {
00839 buffer_free(buffer);
00840 return NULL;
00841 }
00842
00843
00844 result = Py_BuildValue("s#", buffer_get_buffer(buffer),
00845 buffer_get_position(buffer));
00846 buffer_free(buffer);
00847 return result;
00848 }
00849
00850 static PyObject* get_value(const char* buffer, int* position, int type,
00851 PyObject* as_class, unsigned char tz_aware) {
00852 PyObject* value;
00853 switch (type) {
00854 case 1:
00855 {
00856 double d;
00857 memcpy(&d, buffer + *position, 8);
00858 value = PyFloat_FromDouble(d);
00859 if (!value) {
00860 return NULL;
00861 }
00862 *position += 8;
00863 break;
00864 }
00865 case 2:
00866 case 13:
00867 case 14:
00868 {
00869 int value_length = ((int*)(buffer + *position))[0] - 1;
00870 *position += 4;
00871 value = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
00872 if (!value) {
00873 return NULL;
00874 }
00875 *position += value_length + 1;
00876 break;
00877 }
00878 case 3:
00879 {
00880 int size;
00881 memcpy(&size, buffer + *position, 4);
00882 value = elements_to_dict(buffer + *position + 4, size - 5, as_class, tz_aware);
00883 if (!value) {
00884 return NULL;
00885 }
00886
00887
00888 if (strcmp(buffer + *position + 5, "$ref") == 0) {
00889 PyObject* dbref;
00890 PyObject* collection = PyDict_GetItemString(value, "$ref");
00891 PyObject* id = PyDict_GetItemString(value, "$id");
00892 PyObject* database = PyDict_GetItemString(value, "$db");
00893
00894 Py_INCREF(collection);
00895 PyDict_DelItemString(value, "$ref");
00896 Py_INCREF(id);
00897 PyDict_DelItemString(value, "$id");
00898
00899 if (database == NULL) {
00900 database = Py_None;
00901 Py_INCREF(database);
00902 } else {
00903 Py_INCREF(database);
00904 PyDict_DelItemString(value, "$db");
00905 }
00906
00907 dbref = PyObject_CallFunctionObjArgs(DBRef, collection, id, database, value, NULL);
00908 Py_DECREF(value);
00909 value = dbref;
00910
00911 Py_DECREF(id);
00912 Py_DECREF(collection);
00913 Py_DECREF(database);
00914 if (!value) {
00915 return NULL;
00916 }
00917 }
00918
00919 *position += size;
00920 break;
00921 }
00922 case 4:
00923 {
00924 int size,
00925 end;
00926
00927 memcpy(&size, buffer + *position, 4);
00928 end = *position + size - 1;
00929 *position += 4;
00930
00931 value = PyList_New(0);
00932 if (!value) {
00933 return NULL;
00934 }
00935 while (*position < end) {
00936 PyObject* to_append;
00937
00938 int type = (int)buffer[(*position)++];
00939 int key_size = strlen(buffer + *position);
00940 *position += key_size + 1;
00941 to_append = get_value(buffer, position, type, as_class, tz_aware);
00942 if (!to_append) {
00943 return NULL;
00944 }
00945 PyList_Append(value, to_append);
00946 Py_DECREF(to_append);
00947 }
00948 (*position)++;
00949 break;
00950 }
00951 case 5:
00952 {
00953 PyObject* data;
00954 PyObject* st;
00955 int length,
00956 subtype;
00957
00958 memcpy(&length, buffer + *position, 4);
00959 subtype = (unsigned char)buffer[*position + 4];
00960
00961 if (subtype == 2) {
00962 data = PyString_FromStringAndSize(buffer + *position + 9, length - 4);
00963 } else {
00964 data = PyString_FromStringAndSize(buffer + *position + 5, length);
00965 }
00966 if (!data) {
00967 return NULL;
00968 }
00969
00970 if (subtype == 3 && UUID) {
00971 PyObject* kwargs;
00972 PyObject* args = PyTuple_New(0);
00973 if (!args) {
00974 return NULL;
00975 }
00976 kwargs = PyDict_New();
00977 if (!kwargs) {
00978 Py_DECREF(args);
00979 return NULL;
00980 }
00981
00982 assert(length == 16);
00983
00984 PyDict_SetItemString(kwargs, "bytes", data);
00985 value = PyObject_Call(UUID, args, kwargs);
00986
00987 Py_DECREF(args);
00988 Py_DECREF(kwargs);
00989 Py_DECREF(data);
00990 if (!value) {
00991 return NULL;
00992 }
00993
00994 *position += length + 5;
00995 break;
00996 }
00997
00998 st = PyInt_FromLong(subtype);
00999 if (!st) {
01000 Py_DECREF(data);
01001 return NULL;
01002 }
01003 value = PyObject_CallFunctionObjArgs(Binary, data, st, NULL);
01004 Py_DECREF(st);
01005 Py_DECREF(data);
01006 if (!value) {
01007 return NULL;
01008 }
01009 *position += length + 5;
01010 break;
01011 }
01012 case 6:
01013 case 10:
01014 {
01015 value = Py_None;
01016 Py_INCREF(value);
01017 break;
01018 }
01019 case 7:
01020 {
01021 value = PyObject_CallFunction(ObjectId, "s#", buffer + *position, 12);
01022 if (!value) {
01023 return NULL;
01024 }
01025 *position += 12;
01026 break;
01027 }
01028 case 8:
01029 {
01030 value = buffer[(*position)++] ? Py_True : Py_False;
01031 Py_INCREF(value);
01032 break;
01033 }
01034 case 9:
01035 {
01036 PyObject* replace;
01037 PyObject* args;
01038 PyObject* kwargs;
01039 PyObject* naive = datetime_from_millis(*(long long*)(buffer + *position));
01040 *position += 8;
01041 if (!tz_aware) {
01042 value = naive;
01043 break;
01044 }
01045
01046 if (!naive) {
01047 return NULL;
01048 }
01049 replace = PyObject_GetAttrString(naive, "replace");
01050 Py_DECREF(naive);
01051 if (!replace) {
01052 return NULL;
01053 }
01054 args = PyTuple_New(0);
01055 if (!args) {
01056 Py_DECREF(replace);
01057 return NULL;
01058 }
01059 kwargs = PyDict_New();
01060 if (!kwargs) {
01061 Py_DECREF(replace);
01062 Py_DECREF(args);
01063 return NULL;
01064 }
01065 if (PyDict_SetItemString(kwargs, "tzinfo", UTC) == -1) {
01066 Py_DECREF(replace);
01067 Py_DECREF(args);
01068 Py_DECREF(kwargs);
01069 return NULL;
01070 }
01071 value = PyObject_Call(replace, args, kwargs);
01072 Py_DECREF(replace);
01073 Py_DECREF(args);
01074 Py_DECREF(kwargs);
01075 break;
01076 }
01077 case 11:
01078 {
01079 int flags_length,
01080 flags,
01081 i;
01082
01083 int pattern_length = strlen(buffer + *position);
01084 PyObject* pattern = PyUnicode_DecodeUTF8(buffer + *position, pattern_length, "strict");
01085 if (!pattern) {
01086 return NULL;
01087 }
01088 *position += pattern_length + 1;
01089 flags_length = strlen(buffer + *position);
01090 flags = 0;
01091 for (i = 0; i < flags_length; i++) {
01092 if (buffer[*position + i] == 'i') {
01093 flags |= 2;
01094 } else if (buffer[*position + i] == 'l') {
01095 flags |= 4;
01096 } else if (buffer[*position + i] == 'm') {
01097 flags |= 8;
01098 } else if (buffer[*position + i] == 's') {
01099 flags |= 16;
01100 } else if (buffer[*position + i] == 'u') {
01101 flags |= 32;
01102 } else if (buffer[*position + i] == 'x') {
01103 flags |= 64;
01104 }
01105 }
01106 *position += flags_length + 1;
01107 value = PyObject_CallFunction(RECompile, "Oi", pattern, flags);
01108 Py_DECREF(pattern);
01109 break;
01110 }
01111 case 12:
01112 {
01113 int collection_length;
01114 PyObject* collection;
01115 PyObject* id;
01116
01117 *position += 4;
01118 collection_length = strlen(buffer + *position);
01119 collection = PyUnicode_DecodeUTF8(buffer + *position, collection_length, "strict");
01120 if (!collection) {
01121 return NULL;
01122 }
01123 *position += collection_length + 1;
01124 id = PyObject_CallFunction(ObjectId, "s#", buffer + *position, 12);
01125 if (!id) {
01126 Py_DECREF(collection);
01127 return NULL;
01128 }
01129 *position += 12;
01130 value = PyObject_CallFunctionObjArgs(DBRef, collection, id, NULL);
01131 Py_DECREF(collection);
01132 Py_DECREF(id);
01133 break;
01134 }
01135 case 15:
01136 {
01137 int code_length,
01138 scope_size;
01139 PyObject* code;
01140 PyObject* scope;
01141
01142 *position += 8;
01143 code_length = strlen(buffer + *position);
01144 code = PyUnicode_DecodeUTF8(buffer + *position, code_length, "strict");
01145 if (!code) {
01146 return NULL;
01147 }
01148 *position += code_length + 1;
01149
01150 memcpy(&scope_size, buffer + *position, 4);
01151 scope = elements_to_dict(buffer + *position + 4, scope_size - 5,
01152 (PyObject*)&PyDict_Type, tz_aware);
01153 if (!scope) {
01154 Py_DECREF(code);
01155 return NULL;
01156 }
01157 *position += scope_size;
01158
01159 value = PyObject_CallFunctionObjArgs(Code, code, scope, NULL);
01160 Py_DECREF(code);
01161 Py_DECREF(scope);
01162 break;
01163 }
01164 case 16:
01165 {
01166 int i;
01167 memcpy(&i, buffer + *position, 4);
01168 value = PyInt_FromLong(i);
01169 if (!value) {
01170 return NULL;
01171 }
01172 *position += 4;
01173 break;
01174 }
01175 case 17:
01176 {
01177 unsigned int time, inc;
01178 memcpy(&inc, buffer + *position, 4);
01179 memcpy(&time, buffer + *position + 4, 4);
01180 value = PyObject_CallFunction(Timestamp, "II", time, inc);
01181 if (!value) {
01182 return NULL;
01183 }
01184 *position += 8;
01185 break;
01186 }
01187 case 18:
01188 {
01189 long long ll;
01190 memcpy(&ll, buffer + *position, 8);
01191 value = PyLong_FromLongLong(ll);
01192 if (!value) {
01193 return NULL;
01194 }
01195 *position += 8;
01196 break;
01197 }
01198 case -1:
01199 {
01200 value = PyObject_CallFunctionObjArgs(MinKey, NULL);
01201 break;
01202 }
01203 case 127:
01204 {
01205 value = PyObject_CallFunctionObjArgs(MaxKey, NULL);
01206 break;
01207 }
01208 default:
01209 {
01210 PyObject* InvalidDocument = _error("InvalidDocument");
01211 PyErr_SetString(InvalidDocument, "no c decoder for this type yet");
01212 Py_DECREF(InvalidDocument);
01213 return NULL;
01214 }
01215 }
01216 return value;
01217 }
01218
01219 static PyObject* elements_to_dict(const char* string, int max,
01220 PyObject* as_class, unsigned char tz_aware) {
01221 int position = 0;
01222 PyObject* dict = PyObject_CallObject(as_class, NULL);
01223 if (!dict) {
01224 return NULL;
01225 }
01226 while (position < max) {
01227 int type = (int)string[position++];
01228 int name_length = strlen(string + position);
01229 PyObject* name = PyUnicode_DecodeUTF8(string + position, name_length, "strict");
01230 PyObject* value;
01231 if (!name) {
01232 return NULL;
01233 }
01234 position += name_length + 1;
01235 value = get_value(string, &position, type, as_class, tz_aware);
01236 if (!value) {
01237 return NULL;
01238 }
01239
01240 PyObject_SetItem(dict, name, value);
01241 Py_DECREF(name);
01242 Py_DECREF(value);
01243 }
01244 return dict;
01245 }
01246
01247 static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
01248 unsigned int size;
01249 Py_ssize_t total_size;
01250 const char* string;
01251 PyObject* bson;
01252 PyObject* as_class;
01253 unsigned char tz_aware;
01254 PyObject* dict;
01255 PyObject* remainder;
01256 PyObject* result;
01257
01258 if (!PyArg_ParseTuple(args, "OOb", &bson, &as_class, &tz_aware)) {
01259 return NULL;
01260 }
01261
01262 if (!PyString_Check(bson)) {
01263 PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string");
01264 return NULL;
01265 }
01266 total_size = PyString_Size(bson);
01267 if (total_size < 5) {
01268 PyObject* InvalidBSON = _error("InvalidBSON");
01269 PyErr_SetString(InvalidBSON,
01270 "not enough data for a BSON document");
01271 Py_DECREF(InvalidBSON);
01272 return NULL;
01273 }
01274
01275 string = PyString_AsString(bson);
01276 if (!string) {
01277 return NULL;
01278 }
01279 memcpy(&size, string, 4);
01280
01281 if (total_size < size) {
01282 PyObject* InvalidBSON = _error("InvalidBSON");
01283 PyErr_SetString(InvalidBSON,
01284 "objsize too large");
01285 Py_DECREF(InvalidBSON);
01286 return NULL;
01287 }
01288
01289 if (string[size - 1]) {
01290 PyObject* InvalidBSON = _error("InvalidBSON");
01291 PyErr_SetString(InvalidBSON,
01292 "bad eoo");
01293 Py_DECREF(InvalidBSON);
01294 return NULL;
01295 }
01296
01297 dict = elements_to_dict(string + 4, size - 5, as_class, tz_aware);
01298 if (!dict) {
01299 return NULL;
01300 }
01301 remainder = PyString_FromStringAndSize(string + size, total_size - size);
01302 if (!remainder) {
01303 Py_DECREF(dict);
01304 return NULL;
01305 }
01306 result = Py_BuildValue("OO", dict, remainder);
01307 Py_DECREF(dict);
01308 Py_DECREF(remainder);
01309 return result;
01310 }
01311
01312 static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
01313 unsigned int size;
01314 Py_ssize_t total_size;
01315 const char* string;
01316 PyObject* bson;
01317 PyObject* dict;
01318 PyObject* result;
01319 PyObject* as_class = (PyObject*)&PyDict_Type;
01320 unsigned char tz_aware = 1;
01321
01322 if (!PyArg_ParseTuple(args, "O|Ob", &bson, &as_class, &tz_aware)) {
01323 return NULL;
01324 }
01325
01326 if (!PyString_Check(bson)) {
01327 PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a string");
01328 return NULL;
01329 }
01330 total_size = PyString_Size(bson);
01331 string = PyString_AsString(bson);
01332 if (!string) {
01333 return NULL;
01334 }
01335
01336 result = PyList_New(0);
01337
01338 while (total_size > 0) {
01339 if (total_size < 5) {
01340 PyObject* InvalidBSON = _error("InvalidBSON");
01341 PyErr_SetString(InvalidBSON,
01342 "not enough data for a BSON document");
01343 Py_DECREF(InvalidBSON);
01344 return NULL;
01345 }
01346
01347 memcpy(&size, string, 4);
01348
01349 if (total_size < size) {
01350 PyObject* InvalidBSON = _error("InvalidBSON");
01351 PyErr_SetString(InvalidBSON,
01352 "objsize too large");
01353 Py_DECREF(InvalidBSON);
01354 return NULL;
01355 }
01356
01357 if (string[size - 1]) {
01358 PyObject* InvalidBSON = _error("InvalidBSON");
01359 PyErr_SetString(InvalidBSON,
01360 "bad eoo");
01361 Py_DECREF(InvalidBSON);
01362 return NULL;
01363 }
01364
01365 dict = elements_to_dict(string + 4, size - 5, as_class, tz_aware);
01366 if (!dict) {
01367 return NULL;
01368 }
01369 PyList_Append(result, dict);
01370 Py_DECREF(dict);
01371 string += size;
01372 total_size -= size;
01373 }
01374
01375 return result;
01376 }
01377
01378 static PyMethodDef _CBSONMethods[] = {
01379 {"_dict_to_bson", _cbson_dict_to_bson, METH_VARARGS,
01380 "convert a dictionary to a string containing it's BSON representation."},
01381 {"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
01382 "convert a BSON string to a SON object."},
01383 {"decode_all", _cbson_decode_all, METH_VARARGS,
01384 "convert binary data to a sequence of documents."},
01385 {NULL, NULL, 0, NULL}
01386 };
01387
01388 PyMODINIT_FUNC init_cbson(void) {
01389 PyObject *m;
01390
01391 PyDateTime_IMPORT;
01392 m = Py_InitModule("_cbson", _CBSONMethods);
01393 if (m == NULL) {
01394 return;
01395 }
01396
01397
01398 _reload_python_objects();
01399 }