how to fit with minuit in python

6c1e2ff3 · Mauro Donega · 6b7e341e · 6c1e2ff3
Commit 6c1e2ff3 authored 4 years ago by Mauro Donega
--- a/iMinuit.ipynb
+++ b/iMinuit.ipynb
@@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Maximum Likelihood method\n",
+    "# iMinuit for LSQ\n",
    "\n",
    "\n",
    "In this notebook we will be using iminuit to fit with LSQ.\n",

 %% Cell type:markdown id: tags:
-# Maximum Likelihood method
+# iMinuit for LSQ
 In this notebook we will be using iminuit to fit with LSQ.
 iMinuit:
 https://iminuit.readthedocs.io/en/latest/index.html#
 Here below a quick summary of:
 https://iminuit.readthedocs.io/en/latest/tutorials.html
 %% Cell type:code id: tags:
 ``` python
 import numpy as np
 %matplotlib inline
 import matplotlib.pyplot as plt
 from iminuit import Minuit
 ```
 %% Cell type:code id: tags:
 ``` python
 # Generate random numbers on a straight line
 def line(x, a, b):
    return a + x * b
 data_x = np.linspace(0, 1, 10)
 # precomputed random numbers from a normal distribution
 offsets = np.array([-0.49783783, -0.33041722, -1.71800806,  1.60229399,  1.36682387,
                    -1.15424221, -0.91425267, -0.03395604, -1.27611719, -0.7004073 ])
 data_y = line(data_x, 1, 2) + 0.1 * offsets # generate some data points with random offsets
 plt.plot(data_x, data_y, "o")
 plt.xlim(-0.1, 1.1);
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 # Definition of the loss function you want to minimize (here the LSQ)
 def least_squares(a, b):
    yvar = 0.01
    return sum((data_y - line(data_x, a, b)) ** 2 / yvar)
 ```
 %% Cell type:markdown id: tags:
 # Minuit
 %% Cell type:code id: tags:
 ``` python
 # Minuit uses introspection to detect the parameter names of your function - it means
 #
 # A complete instance of Minuit contains the starting point of each parameter (<name>) and
 # the step size (<err_name>) for the dimension corresponding to that parameter.
 #
 # To correctly compute the uncertainties, it needs to know if you are doing a max likelihood
 # or a least squares fit:
 # errordef = 0.5 for negative log-likelihood functions
 # errordef = 1 for least-squares functions
 #
 # You can specify the limits on the parameters to be fit ("limit_varname")
 # lower limit: use limit_<name> = (<value>, None) or (<value>, float("infinity"))
 # upper limit: use limit_<name> = (None, <value>) or (-float("infinity"), <value>)
 # two-sided limit: use limit_<name> = (<min_value>, <max_value>)
 #
 # You can fix some of the parameters (ignore that dimension in the fit) setting <fix_name> = True/False
 # fix_a=True,
 m = Minuit(least_squares,
           a=5, b=5,
           fix_a = True,
           error_a=0.1, error_b=0.1,
           limit_a=(0, None), limit_b=(0, 10),
           errordef=1)
 m.get_param_states()
 # Once Minuit is constructed you can still fix/release parameters as:
 m.fixed["a"] = False
 m.fixed["b"] = True
 # Trick to run over all parameters:
 for key in m.fixed:
    m.fixed[key] = False
 m.migrad()
 # To change the value of a fixed parameter (or reset it to a different value) you can access it as:
 m.values["a"] = 0.5
 ```
 %% Cell type:markdown id: tags:
 # MIGRAD
 %% Cell type:code id: tags:
 ``` python
 # Migrad performs Variable-Metric Minimization. It combines a steepest-descends algorithm along with
 # line search strategy. Migrad is very popular in high energy physics field because of its robustness.
 m.migrad()
 ```
 %% Output
    ------------------------------------------------------------------
    | FCN = 10.39                   |     Ncalls=42 (106 total)      |
    | EDM = 4.32E-06 (Goal: 1E-05)  |            up = 1.0            |
    ------------------------------------------------------------------
    |  Valid Min.   | Valid Param.  | Above EDM | Reached call limit |
    ------------------------------------------------------------------
    |     True      |     True      |   False   |       False        |
    ------------------------------------------------------------------
    | Hesse failed  |   Has cov.    | Accurate  | Pos. def. | Forced |
    ------------------------------------------------------------------
    |     False     |     True      |   True    |   True    | False  |
    ------------------------------------------------------------------
    ------------------------------------------------------------------------------------------
    |   | Name |   Value   | Hesse Err | Minos Err- | Minos Err+ | Limit-  | Limit+  | Fixed |
    ------------------------------------------------------------------------------------------
    | 0 | a    |   0.99    |   0.06    |            |            |    0    |         |       |
    | 1 | b    |   1.95    |   0.10    |            |            |    0    |   10    |       |
    ------------------------------------------------------------------------------------------
 %% Cell type:code id: tags:
 ``` python
 from pprint import pprint
 # To understand the results of the fit there are two dict-like objects:
 # The first one is
 pprint (m.get_fmin())
 # The most important one here is is_valid. If this is false, the fit did not converge and the result is useless !
 #
 # When is_valid = False it can be that the fit function is not analytical everywhere in the parameter space
 # or does not have a local minimum (the minimum may be at infinity, the extremum may be a saddle point
 # or maximum). Indicators for this are:
 #   is_above_max_edm=True
 #   hesse_failed=True
 #   has_posdef_covar=False
 #   has_made_posdef_covar=True
 #
 # Migrad reached the call limit before the convergence so that has_reached_call_limit=True.
 # The used number of function calls is nfcn, and the call limit can be changed with the keyword argument ncall
 # in the method Minuit.migrad
 #
 # Migrad detects converge by a small edm value, the estimated distance to minimum. This is the difference between
 # the current minimum value of the minimized function and the next prediction based on a local quadratic
 # approximation of the function (something that Migrad computes as part of its algorithm). If the fit did not
 # converge, is_above_max_edm is true.
 #
 # To have a reliable uncertainty determination, you should make sure that:
 # has_covariance        = True
 # has_accurate_covar    = True
 # has_posdef_covar      = True
 # has_made_posdef_covar = False
 # hesse_failed          = False
 ```
 %% Output
    FMin(fval=10.387015571126001, edm=4.319932643302712e-06, tolerance=0.1, nfcn=42, ncalls=106, up=1.0, is_valid=True, has_valid_parameters=True, has_accurate_covar=True, has_posdef_covar=True, has_made_posdef_covar=False, hesse_failed=False, has_covariance=True, is_above_max_edm=False, has_reached_call_limit=False)
 %% Cell type:code id: tags:
 ``` python
 # The second one is a list of dict-like objects which contain information about the fitted parameters:
 pprint(m.get_param_states())
 # Important fields are:
 #   index: parameter index.
 #   name: parameter name.
 #   value: value of the parameter at the minimum.
 #   error: uncertainty estimate for the parameter value.
 ```
 %% Output
    [Param(number=0, name='a', value=0.9908538664000521, error=0.05876818861364508, is_const=False, is_fixed=False, has_limits=True, has_lower_limit=True, has_upper_limit=False, lower_limit=0.0, upper_limit=None),
     Param(number=1, name='b', value=1.945147699139382, error=0.09907833701567537, is_const=False, is_fixed=False, has_limits=True, has_lower_limit=True, has_upper_limit=True, lower_limit=0.0, upper_limit=10.0)]
 %% Cell type:code id: tags:
 ``` python
 # if you need just the basic
 print (m.migrad_ok())
 print (m.matrix_accurate())
 ```
 %% Output
    True
    True
 %% Cell type:markdown id: tags:
 # Hesse
 %% Cell type:code id: tags:
 ``` python
 # The Hesse algorithm numerically computes the matrix of second derivatives at the function minimum
 # (called the Hessian matrix) and inverts it.
 # Pros:
 #   (Comparably) fast computation.
 #   Provides covariance matrix for error propagation.
 # Cons:
 #   Wrong if function does not look like a hyperparabola around the minimum
 m.hesse()
 ```
 %% Output
    ------------------------------------------------------------------------------------------
    |   | Name |   Value   | Hesse Err | Minos Err- | Minos Err+ | Limit-  | Limit+  | Fixed |
    ------------------------------------------------------------------------------------------
    | 0 | a    |   0.99    |   0.06    |            |            |    0    |         |       |
    | 1 | b    |   1.95    |   0.10    |            |            |    0    |   10    |       |
    ------------------------------------------------------------------------------------------
 %% Cell type:code id: tags:
 ``` python
 # Covariance - colored table
 m.matrix()
 ```
 %% Output
    ---------------------
    |   |      a      b |
    ---------------------
    | a |  0.003 -0.005 |
    | b | -0.005  0.010 |
    ---------------------
 %% Cell type:code id: tags:
 ``` python
 # to access the matrix
 cov = m.matrix()
 print (cov)
 # or as a numpy array
 npcov = m.np_matrix()
 print (npcov)
 ```
 %% Output
    ---------------------
    |   |      a      b |
    ---------------------
    | a |  0.003 -0.005 |
    | b | -0.005  0.010 |
    ---------------------
    [[ 0.00345475 -0.00490941]
     [-0.00490941  0.00981865]]
 %% Cell type:code id: tags:
 ``` python
 # Correlation - colored table
 m.matrix(correlation=True)
 ```
 %% Output
    -----------------
    |   |    a    b |
    -----------------
    | a |  1.0 -0.8 |
    | b | -0.8  1.0 |
    -----------------
 %% Cell type:code id: tags:
 ``` python
 # to access the matrix
 corr = m.matrix(correlation=True)
 print (corr)
 # or as a numpy array
 npcov = m.np_matrix(correlation=True)
 print (npcov)
 ```
 %% Output
    -----------------
    |   |    a    b |
    -----------------
    | a |  1.0 -0.8 |
    | b | -0.8  1.0 |
    -----------------
    [[ 1.         -0.84293683]
     [-0.84293683  1.        ]]
 %% Cell type:markdown id: tags:
 # Minos
 %% Cell type:code id: tags:
 ``` python
 # Minos implements the so-called profile likelihood method, where the neighborhood around the function minimum
 # is scanned until the contour is found where the function increase by the value of errordef.
 # Pros:
 #  Good for functions which are not very close to a hyper-parabola around the minimum
 #  Produces pretty confidence regions for scientific plots
 # Cons:
 #  Takes really long time
 #  Result is difficult to error-propagate, since it cannot be described by a covariance matrix
 #
 # The results contain information as:
 #  At Limit: Whether Minos hit a parameter limit before the finishing the contour.
 #  Max FCN: Whether Minos reached the maximum number of allowed calls before finishing the contour.
 #  New Min: Whether Minos discovered a deeper local minimum in the neighborhood of the current one.
 ```
 %% Cell type:code id: tags:
 ``` python
 m.minos()
 ```
 %% Output
    -------------------------------------------------
    |        a        |            Valid            |
    -------------------------------------------------
    |      Error      |    -0.06     |     0.06     |
    |      Valid      |     True     |     True     |
    |    At Limit     |    False     |    False     |
    |     Max FCN     |    False     |    False     |
    |     New Min     |    False     |    False     |
    -------------------------------------------------
    -------------------------------------------------
    |        b        |            Valid            |
    -------------------------------------------------
    |      Error      |    -0.10     |     0.10     |
    |      Valid      |     True     |     True     |
    |    At Limit     |    False     |    False     |
    |     Max FCN     |    False     |    False     |
    |     New Min     |    False     |    False     |
    -------------------------------------------------
 %% Cell type:code id: tags:
 ``` python
 m.get_param_states()
 ```
 %% Output
    ------------------------------------------------------------------------------------------
    |   | Name |   Value   | Hesse Err | Minos Err- | Minos Err+ | Limit-  | Limit+  | Fixed |
    ------------------------------------------------------------------------------------------
    | 0 | a    |    0.99   |    0.06   |   -0.06    |    0.06    |    0    |         |       |
    | 1 | b    |    1.95   |    0.10   |   -0.10    |    0.10    |    0    |   10    |       |
    ------------------------------------------------------------------------------------------
 %% Cell type:code id: tags:
 ``` python
 # Other ways to access the fits results
 pprint(m.np_values())
 pprint(m.np_errors())
 pprint(m.np_merrors()) # The layout returned by Minuit.np_merrors() follows the convention
                       # [abs(delta_down), delta_up] that is used by matplotlib.pyplot.errorbar.
 pprint(m.np_covariance())
 ```
 %% Output
    array([0.99085387, 1.9451477 ])
    array([0.05876843, 0.09907874])
    array([[0.05866313, 0.09929038],
           [0.05888831, 0.09888435]])
    array([[ 0.00345475, -0.00490941],
           [-0.00490941,  0.00981865]])
 %% Cell type:markdown id: tags:
 # Plot contours
 %% Cell type:code id: tags:
 ``` python
 m.draw_mncontour('a','b', nsigma=4, numpoints=100)  # nsigma=4 says: draw four contours from sigma=1 to 4
 ```
 %% Output
    <matplotlib.contour.ContourSet at 0x11e23d690>
 %% Cell type:code id: tags:
 ``` python
 m.draw_profile('a');
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 m.draw_mnprofile('a');
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 px, py = m.profile('a', subtract_min=True)
 plt.plot(px, py);
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 ```