If you need a somewhat complex dataset for testing plot features in Python, read this page.
Author: Jim Pivarski
Rationale
Most of the examples must use some dataset, so I prepared a sample of CMS public data to be read with no dependencies. The data came from the CERN Open Data portal; it is 50 fb-1 of highly processed particle physics data from the CMS experiment, with 469,384 events selected by the 24 GeV/c isolated muon trigger.
For convenience, it has been converted to compressed JSON. The code that reads it into classes is provided below for you to copy-paste.
Loading the data quickly
The data-loading code is now included in the core Histogrammar library (as of 1.0.4), so you just do this:
from histogrammar.tutorial import cmsdata
events = cmsdata.EventIterator()
For older versions of Histogrammar or to see what this is doing, read below.
Pedestrian approach
Start a Python prompt and either
- copy-paste the code into your terminal (I’ve had some trouble with pasting from the web into Python), or
- download the raw code and type
exec(open("/path/to/python-cmsdata.py").read())
to load it.
If all goes well, you’ll have a generator named events
that pulls data from the web as needed. You get an event by repeatedly calling events.next()
(Python 2) or events.__next__()
(Python 3) or by using it in a for loop. To restart the iterator from the beginning, do events = EventIterator()
.
The code to copy-paste is below and the (link to raw code is here).
import json
import math
import zlib
# handle Python 3
try:
import urllib2
except ImportError:
import urllib.request as urllib2
# class definitions
class LorentzVector(object):
@property
def pt(self):
return math.sqrt(self.px**2 + self.py**2)
@property
def p(self):
return math.sqrt(self.px**2 + self.py**2 + self.pz**2)
@property
def mass(self):
return math.sqrt(self.E**2 - self.px**2 - self.py**2 - self.pz**2)
@property
def eta(self):
return 0.5*math.log((self.p + self.pz)/(self.p - self.pz))
@property
def phi(self):
return math.atan2(self.py, self.px)
def __add__(self, other):
out = LorentzVector()
out.px = self.px + other.px
out.py = self.py + other.py
out.pz = self.pz + other.pz
out.E = self.E + other.E
return out
def __repr__(self):
return "LorentzVector({}, {}, {}, {})".format(self.px, self.py, self.pz, self.E)
class Jet(LorentzVector):
def __init__(self, px, py, pz, E, btag):
self.px = px
self.py = py
self.pz = pz
self.E = E
self.btag = btag
def __repr__(self):
return "Jet({}, {}, {}, {}, {})".format(self.px, self.py, self.pz, self.E, self.btag)
@staticmethod
def fromJson(params):
return Jet(params["px"],
params["py"],
params["pz"],
params["E"],
params["btag"])
class Muon(LorentzVector):
def __init__(self, px, py, pz, E, q, iso):
self.px = px
self.py = py
self.pz = pz
self.E = E
self.q = q
self.iso = iso
def __repr__(self):
return "Muon({}, {}, {}, {}, {}, {})".format(self.px, self.py, self.pz, self.E, self.q, self.iso)
@staticmethod
def fromJson(params):
return Muon(params["px"],
params["py"],
params["pz"],
params["E"],
params["q"],
params["iso"])
class Electron(LorentzVector):
def __init__(self, px, py, pz, E, q, iso):
self.px = px
self.py = py
self.pz = pz
self.E = E
self.q = q
self.iso = iso
def __repr__(self):
return "Electron({}, {}, {}, {}, {}, {})".format(self.px, self.py, self.pz, self.E, self.q, self.iso)
@staticmethod
def fromJson(params):
return Electron(params["px"],
params["py"],
params["pz"],
params["E"],
params["q"],
params["iso"])
class Photon(LorentzVector):
def __init__(self, px, py, pz, E, iso):
self.px = px
self.py = py
self.pz = pz
self.E = E
self.iso = iso
def __repr__(self):
return "Photon({}, {}, {}, {}, {})".format(self.px, self.py, self.pz, self.E, self.iso)
@staticmethod
def fromJson(params):
return Photon(params["px"],
params["py"],
params["pz"],
params["E"],
params["iso"])
class MET(object):
def __init__(self, px, py):
self.px = px
self.py = py
def __repr__(self):
return "MET({}, {})".format(self.px, self.py)
@property
def pt(self):
return math.sqrt(self.px**2 + self.py**2)
@staticmethod
def fromJson(params):
return MET(params["px"], params["py"])
class Event(object):
def __init__(self, jets, muons, electrons, photons, met, numPrimaryVertices):
self.jets = jets
self.muons = muons
self.electrons = electrons
self.photons = photons
self.met = met
self.numPrimaryVertices = numPrimaryVertices
def __repr__(self):
return "Event({}, {}, {}, {}, {}, {})".format(self.jets, self.muons, self.electrons, self.photons, self.met, self.numPrimaryVertices)
@staticmethod
def fromJson(params):
return Event(list(map(Jet.fromJson, params["jets"])),
list(map(Muon.fromJson, params["muons"])),
list(map(Electron.fromJson, params["electrons"])),
list(map(Photon.fromJson, params["photons"])),
MET.fromJson(params["MET"]),
params["numPrimaryVertices"])
# event data iterator
def EventIterator(location="http://histogrammar.org/docs/data/triggerIsoMu24_50fb-1.json.gz"):
READ_BLOCK_SIZE = 1024*8
webreader = urllib2.urlopen(location)
gzipUnzipper = zlib.decompressobj(16 + zlib.MAX_WBITS)
remainder = b""
eventsBatch = []
while True:
if len(eventsBatch) == 0:
compressedData = webreader.read(READ_BLOCK_SIZE)
data = remainder + gzipUnzipper.decompress(compressedData)
if len(data) == 0:
raise StopIteration
index = data.rfind(b"\n")
if index >= 0:
data, remainder = data[:index + 1], data[index + 1:]
else:
remainder = b""
eventsBatch = [Event.fromJson(json.loads(line.decode()))
for line in reversed(data.split(b"\n"))
if line != b""]
yield eventsBatch.pop()
events = EventIterator()