Skip to content

Commit

Permalink
enh: add custom binned and custom minimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-eschle committed Jul 13, 2024
1 parent d3af390 commit 86b3b0f
Show file tree
Hide file tree
Showing 17 changed files with 1,354 additions and 301 deletions.
2 changes: 1 addition & 1 deletion _unused/kstmumu_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def invariant_mass(four_momenta):
def plot_pdf_data(data, model, title, n_bins=40):
linewidth = 2.5
space = data.data_range
plot_scaling = data.nevents / n_bins * space.area()
plot_scaling = data.nevents / n_bins * space.volume
lower, upper = space.limit1d
x = np.linspace(lower, upper, 1000)

Expand Down
51 changes: 30 additions & 21 deletions _website/tutorials/components/33 - Binned fits.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"import numpy as np\n",
"import zfit\n",
"import zfit.z.numpy as znp # numpy-like backend interface\n",
"from zfit import z # backend, special functions\n",
"\n",
"zfit.settings.set_seed(43)"
]
Expand Down Expand Up @@ -70,7 +69,7 @@
"sigma = zfit.Parameter(\"sigma\", 1., 0.1, 10)\n",
"model_nobin = zfit.pdf.Gauss(mu, sigma, obs)\n",
"\n",
"data_nobin = zfit.Data.from_numpy(obs, normal_np)\n",
"data_nobin = zfit.Data(obs=obs, data=normal_np)\n",
"\n",
"loss_nobin = zfit.loss.UnbinnedNLL(model_nobin, data_nobin)"
]
Expand All @@ -94,6 +93,8 @@
"nbins = 50\n",
"data = data_nobin.to_binned(nbins)\n",
"model = model_nobin.to_binned(data.space)\n",
"\n",
"# we can create a binned NLL\n",
"loss = zfit.loss.BinnedNLL(model, data)"
]
},
Expand Down Expand Up @@ -185,7 +186,7 @@
"obs_binned = obs.with_binning(nbins)\n",
"print(obs_binned)\n",
"\n",
"# or we can create binnings\n",
"# or we can create binnings (same as boost-histogram/hist)\n",
"binning_regular = zfit.binned.RegularBinning(nbins, -10, 10, name='x')\n",
"binning_variable = zfit.binned.VariableBinning([-10, -6, -1, -0.1, 0.4, 3, 10], name='x')"
]
Expand Down Expand Up @@ -252,7 +253,7 @@
"outputs": [],
"source": [
"h = model.to_hist()\n",
"h_scaled = h * [10_000]"
"h_scaled = h * 10_000"
]
},
{
Expand Down Expand Up @@ -488,15 +489,19 @@
"outputs": [],
"source": [
"bkg_hist = zfit.Data(np.random.exponential(scale=20, size=100_000) - 10, obs=obs_binned)\n",
"bkg_hist_m1 = zfit.Data.from_numpy(obs=obs,\n",
" array=np.random.exponential(scale=35, size=100_000) - 10).to_binned(\n",
" obs_binned)\n",
"bkg_hist_m05 = zfit.Data.from_numpy(obs=obs,\n",
" array=np.random.exponential(scale=26, size=100_000) - 10).to_binned(\n",
" obs_binned)\n",
"bkg_hist_p1 = zfit.Data.from_numpy(obs=obs,\n",
" array=np.random.exponential(scale=17, size=100_000) - 10).to_binned(\n",
" obs_binned)\n",
"\n",
"# creating templates, different ways of going about it\n",
"# 1. create unbinned and convert to binned\n",
"bkg_m1_unbinned = zfit.Data(obs=obs, data=np.random.exponential(scale=35, size=100_000) - 10)\n",
"bkg_hist_m1 = bkg_m1_unbinned.to_binned(obs_binned)\n",
"\n",
"# 2. directly create binned by using the binned observables\n",
"bkg_hist_m05 = zfit.Data(obs=obs_binned, data=np.random.exponential(scale=26, size=100_000) - 10)\n",
"\n",
"# 3. use the `from_numpy` method that has more specific options than just `Data`\n",
"bkg_hist_p1 = zfit.data.from_numpy(obs=obs_binned, array=np.random.exponential(scale=17, size=100_000) - 10)\n",
"\n",
"# put them into a dict that maps the modifier value to the histogram\n",
"bkg_hists = {-1: bkg_hist_m1, -0.5: bkg_hist_m05, 0: bkg_hist, 1: bkg_hist_p1}\n",
"bkg_histpdfs = {k: zfit.pdf.HistogramPDF(v) for k, v in bkg_hists.items()}\n",
"mplhep.histplot(list(bkg_hists.values()), label=list(bkg_hists.keys()))\n",
Expand Down Expand Up @@ -538,7 +543,7 @@
"metadata": {},
"outputs": [],
"source": [
"bkg_pdf = zfit.pdf.HistogramPDF(bkg_hist, extended=bkg_yield) # we don't use the spline for simplicity"
"# bkg_pdf = zfit.pdf.HistogramPDF(bkg_hist, extended=bkg_yield) # we don't use the spline for simplicity"
]
},
{
Expand All @@ -557,9 +562,12 @@
"metadata": {},
"outputs": [],
"source": [
"with zfit.param.set_values([alpha] + list(modifiers.values()),\n",
" [0.] + list(np.random.normal(1.0, scale=0.14, size=len(modifiers)))):\n",
" data = bkg_pdf.sample(n=10_000).to_hist() + sig_pdf.sample(1000).to_hist()\n",
"mods_signal = {m: np.random.normal(1.0, scale=0.14) for m in modifiers.values()}\n",
"mods_bkg = {alpha: 0.1}\n",
"\n",
"bkghist = bkg_pdf.sample(n=10_000, params=mods_bkg).to_hist()\n",
"sighist = sig_pdf.sample(1000, params=mods_signal).to_hist()\n",
"data = bkghist + sighist\n",
"data"
]
},
Expand All @@ -570,7 +578,7 @@
"outputs": [],
"source": [
"modifier_constraints = zfit.constraint.GaussianConstraint(params=list(modifiers.values()), observation=np.ones(len(modifiers)),\n",
" uncertainty=0.1 * np.ones(len(modifiers)))\n",
" uncertainty=0.1 * np.ones(len(modifiers)))\n",
"alpha_constraint = zfit.constraint.GaussianConstraint(alpha, 0, sigma=1)"
]
},
Expand Down Expand Up @@ -636,7 +644,7 @@
"metadata": {},
"outputs": [],
"source": [
"unbinned_spline = zfit.pdf.SplinePDF(sig_pdf)"
"unbinned_spline = zfit.pdf.SplinePDF(sig_pdf, label=\"splined model\")"
]
},
{
Expand All @@ -645,8 +653,9 @@
"metadata": {},
"outputs": [],
"source": [
"plt.plot(x, unbinned_spline.pdf(x))\n",
"mplhep.histplot(sig_pdf.to_hist(), density=True)"
"# plt.plot(x, unbinned_spline.pdf(x))\n",
"mplhep.histplot(sig_pdf.to_hist(), density=True, label='binned model')\n",
"unbinned_spline.plot.plotpdf(extended=False) # extended=False means plot the PDF density, not scaled to yield"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@
"import mplhep\n",
"import numpy as np\n",
"import particle.literals as lp\n",
"import tensorflow as tf\n",
"import zfit.z.numpy\n",
"import zfit\n",
"import zfit.z.numpy as znp\n",
"import zfit_physics as zphys\n",
"\n",
"plt.rcParams['figure.figsize'] = (8, 6)"
]
Expand Down Expand Up @@ -74,7 +75,7 @@
"outputs": [],
"source": [
"# Firstly, the observable and its range is defined\n",
"obs = zfit.Space('Bmass', 5000, 6000) # for whole range"
"obs = zfit.Space('Bmass', 5000, 6000, label=\"$B_{mass} [MeV/c^2]$\") # for whole range"
]
},
{
Expand All @@ -97,9 +98,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Parameters are specified: (name (unique), initial, lower, upper) whereas lower, upper are optional\n",
"mu = zfit.Parameter('mu', 5279, 5100, 5400)\n",
"sigma = zfit.Parameter('sigma', 20, 1, 200)\n",
"# Parameters are specified: (name , initial, lower, upper) whereas lower, upper are optional\n",
"mu = zfit.Parameter('mu', 5279, 5100, 5400, label=r\"$\\mu$ [MeV/c^2]$\")\n",
"sigma = zfit.Parameter('sigma', 20, 1, 200, label=r\"$\\sigma$ [MeV/c^2]$\")\n",
"sig_yield = zfit.Parameter('sig_yield', n_sig_rare + 30,\n",
" step_size=3) # step size: default is small, use appropriate\n",
"signal = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs, extended=sig_yield)\n",
Expand All @@ -118,7 +119,20 @@
"metadata": {},
"outputs": [],
"source": [
"constraint = zfit.constraint.GaussianConstraint(mu, observation=5275 * u.MeV, sigma=15 * u.MeV)"
"model.plot.plotpdf()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"observ = 5270\n",
"constraint = zfit.constraint.GaussianConstraint(mu, observation=observ * u.MeV, sigma=15 * u.MeV)\n",
"constr_space = zfit.Space('mu', 4500, 6000)\n",
"gaussconstr = zfit.pdf.Gauss(mu=mu, sigma=15, obs=constr_space, extended=1)\n",
"constr_val = zfit.Data.from_numpy(array=np.array(observ), obs=constr_space)"
]
},
{
Expand All @@ -128,8 +142,14 @@
"outputs": [],
"source": [
"nll = zfit.loss.ExtendedUnbinnedNLL(model, data, constraints=constraint)\n",
"nllalt = zfit.loss.ExtendedUnbinnedNLL(model, data)\n",
"nllconstraint = zfit.loss.ExtendedUnbinnedNLL(gaussconstr, constr_val)\n",
"nll2 = nllalt + nllconstraint\n",
"init_vals = {mu: mu.value(), sigma: sigma.value(), sig_yield: sig_yield.value(), lam: lam.value(),\n",
" bkg_yield: bkg_yield.value()}\n",
"zfit.param.set_values(init_vals)\n",
"minimizer = zfit.minimize.Minuit(gradient=\"zfit\")\n",
"result = minimizer.minimize(nll)\n",
"result = minimizer.minimize(nll2)\n",
"result.hesse();"
]
},
Expand All @@ -142,6 +162,24 @@
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -349,21 +387,20 @@
" if ax is None:\n",
" ax = plt.gca()\n",
"\n",
" lower, upper = data.data_range.limit1d\n",
" lower, upper = data.space.v1.limits\n",
"\n",
" # Creates and histogram of the data and plots it with mplhep.\n",
" counts, bin_edges = np.histogram(data.unstack_x(), bins=nbins)\n",
" mplhep.histplot(counts, bins=bin_edges, histtype=\"errorbar\", yerr=True,\n",
" binneddata = data.to_binned(nbins)\n",
" mplhep.histplot(binneddata, histtype=\"errorbar\", yerr=True,\n",
" label=\"Data\", ax=ax, color=\"black\")\n",
"\n",
" binwidth = np.diff(bin_edges)[0]\n",
" x = tf.linspace(lower, upper, num=1000) # or np.linspace\n",
"\n",
" x = znp.linspace(lower, upper, num=1000) # or np.linspace\n",
" # Line plots of the total pdf and the sub-pdfs.\n",
" y = model.ext_pdf(x) * binwidth\n",
" scaling = data.space.volume / nbins\n",
" y = model.ext_pdf(x) * scaling\n",
" ax.plot(x, y, label=\"total\", color=\"royalblue\")\n",
" for m, l, c in zip(model.get_models(), [\"background\", \"signal\"], [\"forestgreen\", \"crimson\"]):\n",
" ym = m.ext_pdf(x) * binwidth\n",
" ym = m.ext_pdf(x) * scaling\n",
" ax.plot(x, ym, label=l, color=c)\n",
"\n",
" ax.set_title(data.data_range.obs[0])\n",
Expand Down
Loading

0 comments on commit 86b3b0f

Please sign in to comment.