MessagePack vs JSON vs BSON

‹ Too much hug | Using celery with SQS ›

Here is a benchmark of MessagePack vs JSON vs BSON in Python 2.7.3 in Ubuntu 12.04.4 LTS on a 2.5 GHz Intel Core2 Quad Q8300. The software used was:

  • msgpack-python 0.1.10-1 ubuntu package
  • msgpack_python-0.4.1-py2.7-linux-i686.egg installed with easy_install
  • python-bson 2.1-1ubuntu0.1 ubuntu package
  • bson from pymongo-2.7-py2.7-linux-i686.egg installed with easy_install
  • stock Python 2.7.3 json package

msgpack was the winner in this test. As compared to the python json implementation,

msgpackwas 40% faster at serializing
msgpackwas 80% faster at deserializing
msgpackproduced output that was 23% smaller
bsonwas 300-500% slower at serializing
bsonwas 45% faster at deserializing
bsonproduced output that was 46% bigger

#!/usr/bin/env python
import csv
import json
import sys
import timeit

import bson
import msgpack

writer = csv.writer(sys.stdout)

def profile(library, operation, structure, func):
   times = timeit.repeat(func, number=1000, repeat=4)
   row = [library, operation, structure] + ["%s" % t for t in times]
   writer.writerow(row)

def simple(name, data):
   mpack = msgpack.packb(data)
   jdump = json.dumps(data)
   bdump = bson.BSON.encode(data)

   profile('msgpack', 'dump', name, lambda: msgpack.packb(data))
   profile('msgpack', 'load', name, lambda: msgpack.unpackb(mpack))
   writer.writerow(('msgpack', 'size', name, len(mpack)))

   profile('bson', 'dump', name, lambda: bson.BSON.encode(data))
   profile('bson', 'load', name, lambda: bson.BSON.decode(bdump))
   writer.writerow(('bson', 'size', name, len(bdump)))

   profile('json', 'dump', name, lambda: json.dumps(data))
   profile('json', 'load', name, lambda: json.loads(jdump))
   writer.writerow(('json', 'size', name, len(jdump)))

def nesteddict(depth=5):
   width = 5
   if depth <= 1:
      return dict(zip(('y'*n for n in range(width)),
               ('z'*n for n in range(width))))
   else:
      return dict(zip(('y'*n for n in range(width)),
            (nesteddict(depth-1) for n in range(width))))

def flatdict(width=500):
   return dict(zip(('y'*n for n in range(width)),
            ('z'*n for n in range(width))))

def main():
   simple("integers", {'a': [7]*10000})
   simple("strings", {'a': ['x'*n for n in range(100)]*10})
   simple("lists", {'a': [[]]*10000})
   simple("flat", flatdict())
   simple("nested", nesteddict())
   simple("dicts", {'a': [{}]*10000})

if __name__ == "__main__":
   main()

Subscribe to All Posts - Wesley Tanaka