cleaned

dea0f465 · Mauro Donega · 87246a74 · dea0f465
Commit dea0f465 authored 4 years ago by Mauro Donega
--- a/notebooks/ubinnedLikelihood.ipynb
+++ b/notebooks/ubinnedLikelihood.ipynb
@@ -10,7 +10,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -65,7 +65,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -74,8 +74,7 @@
    "    gauss1 = 1./np.sqrt(2*np.pi*width**2)*np.exp(-0.5*((x-position)/(width))**2.0)\n",
    "    gauss2 = 1./np.sqrt(2*np.pi*5**2)*np.exp(-0.5*((x-91)/(5))**2.0)\n",
    "    f = 1./3.\n",
-    "    return f * gauss1 + (1-f)* gauss2 \n",
-    "# 1.0/3/np.sqrt(2*np.pi*width**2)*np.exp(-0.5*((x-position)/(width))**2.0) + 2.0/3/np.sqrt(2*np.pi*5**2)*np.exp(-0.5*((x-91)/(5))**2.0)"
+    "    return f * gauss1 + (1-f)* gauss2 "
   ]
  },
  {
@@ -90,7 +89,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -110,7 +109,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -185,7 +184,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -205,7 +204,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@@ -231,7 +230,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@@ -271,9 +270,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[[10.89069778 -2.44352105]\n",
-      " [-2.44352105  4.98117151]] \n",
-      "sigma(position): 3.300105723005907 sigma(width): 2.231853827463106\n"
+      "[[10.89066093 -2.44351183]\n",
+      " [-2.44351183  4.98113048]] \n",
+      "sigma(position): 3.3001001396079648 sigma(width): 2.2318446354879837\n"
     ]
    }
   ],

 %% Cell type:markdown id: tags:

 # Unbinned Maximum Likelihood fit
 Implement by hand an unbinned maximum likelihood fit

 %% Cell type:code id: tags:

 ``` python
 from __future__ import print_function
 import numpy as np
 %matplotlib inline
 import matplotlib.pyplot as plt
 from scipy.optimize import curve_fit, minimize, fsolve
 from scipy.stats import norm, chi2, lognorm
 ```

 %% Cell type:code id: tags:

 ``` python
 measurements = np.array([97.8621, 114.105, 87.7593, 93.2134, 86.6624, 87.4629, 79.7712, \
 91.5024, 87.7737, 89.6926, 133.506, 91.4124, 94.4401, 97.3968, \
 108.424, 103.197, 88.2166, 142.217, 89.0393, 102.438, 95.7987, \
 94.5177, 96.8171, 90.903, 132.463, 92.3394, 84.1451, 87.3447, \
 92.2861, 84.4213, 124.017, 90.4941, 95.7992, 92.3484, 95.9813, \
 88.0641, 101.002, 97.7268, 137.379, 96.213, 140.795, 99.9332, \
 130.087, 108.839, 90.0145, 100.313, 87.5952, 92.995, 114.457, \
 90.7526, 112.181, 117.857, 95.2804, 115.922, 117.043, 104.317, \
 126.728, 87.8592, 89.9614, 100.377, 107.38, 88.8426, 93.3224, \
 138.947, 102.288, 123.431, 114.334, 88.5134, 124.7, 87.7316, 84.7141, \
 91.1646, 87.891, 121.257, 92.9314])
 ```

 %% Cell type:markdown id: tags:

 ## 1-D Maximum likelihood fit

 %% Cell type:markdown id: tags:

 We have a set of measurements which are distributed according to the sum of two Gaussians (e.g. this can be signal and background).

 $\rho = \frac{1}{3}\frac{1}{\sqrt{2\pi \sigma^2}} e^{-\frac{1}{2}\left(\frac{x-p}{\sigma}\right)^2} + \frac{2}{3}\frac{1}{\sqrt{2\pi \sigma_b^2}} e^{-\frac{1}{2}\left(\frac{x-p_b}{\sigma_b}\right)^2}$

 where for one of the two peaks the parameters are known already

 $p_b = 91.0$
 $\sigma_b = 5.0$


 %% Cell type:code id: tags:

 ``` python
 # this is my model for the data
 def likelihood_point(x, position, width):
    gauss1 = 1./np.sqrt(2*np.pi*width**2)*np.exp(-0.5*((x-position)/(width))**2.0)
    gauss2 = 1./np.sqrt(2*np.pi*5**2)*np.exp(-0.5*((x-91)/(5))**2.0)
    f = 1./3.
    return f * gauss1 + (1-f)* gauss2
-# 1.0/3/np.sqrt(2*np.pi*width**2)*np.exp(-0.5*((x-position)/(width))**2.0) + 2.0/3/np.sqrt(2*np.pi*5**2)*np.exp(-0.5*((x-91)/(5))**2.0)
 ```

 %% Cell type:markdown id: tags:

 First, we assume the width of the peak we want to fit is already known: $\sigma = 15.0$.
 Perform a 1-D Maximum Likelihood fit for the position of the peak $p$.

 Complete the functions below which return the likelihood and negative log likelihood (NLL).

 %% Cell type:code id: tags:

 ``` python
 def likelihood_1d(params):
    return np.prod([likelihood_point(x, params[0], 15.0) for x in measurements])

 def nll_1d(params):
    return -np.log(likelihood_1d(params))
 ```

 %% Cell type:markdown id: tags:

 Minimize the NLL and give the best-fit result, including asymetric errors and plot the NLL.

 %% Cell type:code id: tags:

 ``` python
 solution = minimize(nll_1d, [100.0], method='CG')
 print (solution.message,"\n")
 print ("Fit results:")
 print (solution)

 min_pos = solution.x[0]
 min0 = solution.fun

 # scan numberically the likelihood to get the uncertainties
 scan_points = np.linspace(110.0,126.0,50)
 plt.plot(scan_points, [nll_1d([x]) - min0 for x in scan_points])

 nll_1sigma = lambda x: nll_1d([x]) - min0 - 0.5

 print("\nUncertainty scan:")
 print("negative error:", min_pos - fsolve(nll_1sigma, min_pos-0.5))
 print("positive error:", fsolve(nll_1sigma, min_pos+0.5) - min_pos)
 plt.show()
 ```

 %% Output

    Optimization terminated successfully.
    
    Fit results:
         fun: 291.4301328503986
         jac: array([-3.81469727e-06])
     message: 'Optimization terminated successfully.'
        nfev: 44
         nit: 4
        njev: 22
      status: 0
     success: True
           x: array([117.72327492])
    
    Uncertainty scan:
    negative error: [3.3120601]
    positive error: [3.3909765]



 %% Cell type:markdown id: tags:

 ## 2-D Likelihood fit

 %% Cell type:markdown id: tags:

 Now we perform the 2-D Maximum Likelihood fit, fitting for both $\sigma$ and $p$ at the same time.

 %% Cell type:code id: tags:

 ``` python
 def likelihood(params):
    return np.prod([likelihood_point(x, params[0], params[1]) for x in measurements])

 def nll(params):
    return -np.log(likelihood(params))
 ```

 %% Cell type:markdown id: tags:

 Minimize the NLL and find the best-fit result.

 %% Cell type:code id: tags:

 ``` python
 solution = minimize(nll, [120.0, 10], method='CG')
 print("position = {:.4f}".format(solution.x[0]), "\nwidth = {:.4f}".format(solution.x[1]))
 ```

 %% Output

    position = 118.3155
    width = 13.6297

 %% Cell type:markdown id: tags:

 Create a 2D contour plot of the 1, 2 and 3 $\sigma$ contours of the NLL and plot the best-fit solution.

 %% Cell type:code id: tags:

 ``` python
 scanA = np.linspace(110.0,130.0,50)
 scanB = np.linspace(5,20,50)
 minValue = nll(solution.x)
 Z = [[nll([a,b]) - minValue for b in scanB] for a in scanA]
 p1 = plt.contour(scanB, scanA, Z, [0.01,0.5, 2.0, 4.5])
 ```

 %% Output



 %% Cell type:markdown id: tags:

 Compute numerically the error matrix of the NLL for the 2-D fit.

 %% Cell type:code id: tags:

 ``` python
 from scipy.misc import derivative

 # compute the error matrix
 A = np.linalg.inv([
    [
        derivative(lambda x: nll([x, solution.x[1]]), solution.x[0], n=2),
        derivative(lambda y: derivative(lambda x: nll([x, y]), solution.x[0]), solution.x[1])
    ],
    [
        derivative(lambda x: derivative(lambda y: nll([x, y]), solution.x[1]), solution.x[0]),
        derivative(lambda y: nll([solution.x[0], y]), solution.x[1], n=2)
    ]
 ])
 print(A, "\nsigma(position):", np.sqrt(A[0,0]), "sigma(width):", np.sqrt(A[1,1]))
 ```

 %% Output

-    [[10.89069778 -2.44352105]
-     [-2.44352105  4.98117151]]
-    sigma(position): 3.300105723005907 sigma(width): 2.231853827463106
+    [[10.89066093 -2.44351183]
+     [-2.44351183  4.98113048]]
+    sigma(position): 3.3001001396079648 sigma(width): 2.2318446354879837

 %% Cell type:code id: tags:

 ``` python
 ```