00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 """MongoDB benchmarking suite."""
00016
00017 import time
00018 import sys
00019 sys.path[0:0] = [""]
00020
00021 import datetime
00022 import cProfile
00023
00024 from pymongo import connection
00025 from pymongo import ASCENDING
00026
00027 trials = 2
00028 per_trial = 5000
00029 batch_size = 100
00030 small = {}
00031 medium = {"integer": 5,
00032 "number": 5.05,
00033 "boolean": False,
00034 "array": ["test", "benchmark"]
00035 }
00036
00037 large = {"base_url": "http://www.example.com/test-me",
00038 "total_word_count": 6743,
00039 "access_time": datetime.datetime.utcnow(),
00040 "meta_tags": {"description": "i am a long description string",
00041 "author": "Holly Man",
00042 "dynamically_created_meta_tag": "who know\n what"
00043 },
00044 "page_structure": {"counted_tags": 3450,
00045 "no_of_js_attached": 10,
00046 "no_of_images": 6
00047 },
00048 "harvested_words": ["10gen","web","open","source","application","paas",
00049 "platform-as-a-service","technology","helps",
00050 "developers","focus","building","mongodb","mongo"] * 20
00051 }
00052
00053 def setup_insert(db, collection, object):
00054 db.drop_collection(collection)
00055
00056 def insert(db, collection, object):
00057 for i in range(per_trial):
00058 to_insert = object.copy()
00059 to_insert["x"] = i
00060 db[collection].insert(to_insert)
00061
00062 def insert_batch(db, collection, object):
00063 for i in range(per_trial / batch_size):
00064 db[collection].insert([object] * batch_size)
00065
00066 def find_one(db, collection, x):
00067 for _ in range(per_trial):
00068 db[collection].find_one({"x": x})
00069
00070 def find(db, collection, x):
00071 for _ in range(per_trial):
00072 for _ in db[collection].find({"x": x}):
00073 pass
00074
00075 def timed(name, function, args=[], setup=None):
00076 times = []
00077 for _ in range(trials):
00078 if setup:
00079 setup(*args)
00080 start = time.time()
00081 function(*args)
00082 times.append(time.time() - start)
00083 best_time = min(times)
00084 print "%s%d" % (name + (60 - len(name)) * ".", per_trial / best_time)
00085 return best_time
00086
00087 def main():
00088 connection._TIMEOUT=60
00089 c = connection.Connection()
00090 c.drop_database("benchmark")
00091 db = c.benchmark
00092
00093 timed("insert (small, no index)", insert, [db, 'small_none', small], setup_insert)
00094 timed("insert (medium, no index)", insert, [db, 'medium_none', medium], setup_insert)
00095 timed("insert (large, no index)", insert, [db, 'large_none', large], setup_insert)
00096
00097 db.small_index.create_index("x", ASCENDING)
00098 timed("insert (small, indexed)", insert, [db, 'small_index', small])
00099 db.medium_index.create_index("x", ASCENDING)
00100 timed("insert (medium, indexed)", insert, [db, 'medium_index', medium])
00101 db.large_index.create_index("x", ASCENDING)
00102 timed("insert (large, indexed)", insert, [db, 'large_index', large])
00103
00104 timed("batch insert (small, no index)", insert_batch, [db, 'small_bulk', small], setup_insert)
00105 timed("batch insert (medium, no index)", insert_batch, [db, 'medium_bulk', medium], setup_insert)
00106 timed("batch insert (large, no index)", insert_batch, [db, 'large_bulk', large], setup_insert)
00107
00108 timed("find_one (small, no index)", find_one, [db, 'small_none', per_trial / 2])
00109 timed("find_one (medium, no index)", find_one, [db, 'medium_none', per_trial / 2])
00110 timed("find_one (large, no index)", find_one, [db, 'large_none', per_trial / 2])
00111
00112 timed("find_one (small, indexed)", find_one, [db, 'small_index', per_trial / 2])
00113 timed("find_one (medium, indexed)", find_one, [db, 'medium_index', per_trial / 2])
00114 timed("find_one (large, indexed)", find_one, [db, 'large_index', per_trial / 2])
00115
00116 timed("find (small, no index)", find, [db, 'small_none', per_trial / 2])
00117 timed("find (medium, no index)", find, [db, 'medium_none', per_trial / 2])
00118 timed("find (large, no index)", find, [db, 'large_none', per_trial / 2])
00119
00120 timed("find (small, indexed)", find, [db, 'small_index', per_trial / 2])
00121 timed("find (medium, indexed)", find, [db, 'medium_index', per_trial / 2])
00122 timed("find (large, indexed)", find, [db, 'large_index', per_trial / 2])
00123
00124
00125
00126
00127
00128
00129
00130
00131 timed("find range (small, indexed)", find,
00132 [db, 'small_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}])
00133 timed("find range (medium, indexed)", find,
00134 [db, 'medium_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}])
00135 timed("find range (large, indexed)", find,
00136 [db, 'large_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}])
00137
00138 if __name__ == "__main__":
00139
00140 main()