"""
This module implements maximum likelihood-based estimation (MLE) of
Gaussian regression models for finite-dimensional observations made on
infinite-dimensional processes.

The ProcessMLE class supports regression analyses on grouped data,
where the observations within a group are dependent (they are made on
the same underlying process).  One use-case is repeated measures
regression for temporal (longitudinal) data, in which the repeated
measures occur at arbitrary real-valued time points.

The mean structure is specified as a linear model.  The covariance
parameters depend on covariates via a link function.
"""

import numpy as np
import pandas as pd
import patsy
import statsmodels.base.model as base
from statsmodels.regression.linear_model import OLS
import collections
from scipy.optimize import minimize
from statsmodels.iolib import summary2
from statsmodels.tools.numdiff import approx_fprime
import warnings


class ProcessCovariance:
    r"""
    A covariance model for a process indexed by a real parameter.

    An implementation of this class is based on a positive definite
    correlation function h that maps real numbers to the interval [0,
    1], such as the Gaussian (squared exponential) correlation
    function :math:`\exp(-x^2)`.  It also depends on a positive
    scaling function `s` and a positive smoothness function `u`.
    """

    def get_cov(self, time, sc, sm):
        """
        Returns the covariance matrix for given time values.

        Parameters
        ----------
        time : array_like
            The time points for the observations.  If len(time) = p,
            a pxp covariance matrix is returned.
        sc : array_like
            The scaling parameters for the observations.
        sm : array_like
            The smoothness parameters for the observation.  See class
            docstring for details.
        """
        raise NotImplementedError

    def jac(self, time, sc, sm):
        """
        The Jacobian of the covariance with respect to the parameters.

        See get_cov for parameters.

        Returns
        -------
        jsc : list-like
            jsc[i] is the derivative of the covariance matrix
            with respect to the i^th scaling parameter.
        jsm : list-like
            jsm[i] is the derivative of the covariance matrix
            with respect to the i^th smoothness parameter.
        """
        raise NotImplementedError


class GaussianCovariance(ProcessCovariance):
    r"""
    An implementation of ProcessCovariance using the Gaussian kernel.

    This class represents a parametric covariance model for a Gaussian
    process as described in the work of Paciorek et al. cited below.

    Following Paciorek et al [1]_, the covariance between observations with
    index `i` and `j` is given by:

    .. math::

      s[i] \cdot s[j] \cdot h(|time[i] - time[j]| / \sqrt{(u[i] + u[j]) /
      2}) \cdot \frac{u[i]^{1/4}u[j]^{1/4}}{\sqrt{(u[i] + u[j])/2}}

    The ProcessMLE class allows linear models with this covariance
    structure to be fit using maximum likelihood (ML). The mean and
    covariance parameters of the model are fit jointly.

    The mean, scaling, and smoothing parameters can be linked to
    covariates.  The mean parameters are linked linearly, and the
    scaling and smoothing parameters use an log link function to
    preserve positivity.

    The reference of Paciorek et al. below provides more details.
    Note that here we only implement the 1-dimensional version of
    their approach.

    References
    ----------
    .. [1] Paciorek, C. J. and Schervish, M. J. (2006). Spatial modeling using
        a new class of nonstationary covariance functions. Environmetrics,
        17:483–506.
        https://papers.nips.cc/paper/2350-nonstationary-covariance-functions-for-gaussian-process-regression.pdf
    """

    def get_cov(self, time, sc, sm):

        da = np.subtract.outer(time, time)
        ds = np.add.outer(sm, sm) / 2

        qmat = da * da / ds
        cm = np.exp(-qmat / 2) / np.sqrt(ds)
        cm *= np.outer(sm, sm)**0.25
        cm *= np.outer(sc, sc)

        return cm

    def jac(self, time, sc, sm):

        da = np.subtract.outer(time, time)
        ds = np.add.outer(sm, sm) / 2
        sds = np.sqrt(ds)
        daa = da * da
        qmat = daa / ds
        p = len(time)
        eqm = np.exp(-qmat / 2)
        sm4 = np.outer(sm, sm)**0.25
        cmx = eqm * sm4 / sds
        dq0 = -daa / ds**2
        di = np.zeros((p, p))
        fi = np.zeros((p, p))
        scc = np.outer(sc, sc)

        # Derivatives with respect to the smoothing parameters.
        jsm = []
        for i, _ in enumerate(sm):
            di *= 0
            di[i, :] += 0.5
            di[:, i] += 0.5
            dbottom = 0.5 * di / sds
            dtop = -0.5 * eqm * dq0 * di
            b = dtop / sds - eqm * dbottom / ds
            c = eqm / sds
            v = 0.25 * sm**0.25 / sm[i]**0.75
            fi *= 0
            fi[i, :] = v
            fi[:, i] = v
            fi[i, i] = 0.5 / sm[i]**0.5
            b = c * fi + b * sm4
            b *= scc
            jsm.append(b)

        # Derivatives with respect to the scaling parameters.
        jsc = []
        for i in range(0, len(sc)):
            b = np.zeros((p, p))
            b[i, :] = cmx[i, :] * sc
            b[:, i] += cmx[:, i] * sc
            jsc.append(b)

        return jsc, jsm


def _check_args(endog, exog, exog_scale, exog_smooth, exog_noise, time,
                groups):

    v = [
        len(endog),
        exog.shape[0],
        exog_scale.shape[0],
        exog_smooth.shape[0],
        len(time),
        len(groups)
    ]

    if exog_noise is not None:
        v.append(exog_noise.shape[0])

    if min(v) != max(v):
        msg = ("The leading dimensions of all array arguments " +
               "must be equal.")
        raise ValueError(msg)


class ProcessMLE(base.LikelihoodModel):
    """
    Fit a Gaussian mean/variance regression model.

    This class fits a one-dimensional Gaussian process model with
    parametrized mean and covariance structures to grouped data.  For
    each group, there is an independent realization of a latent
    Gaussian process indexed by an observed real-valued time
    variable..  The data consist of the Gaussian process observed at a
    finite number of `time` values.

    The process mean and variance can be lined to covariates.  The
    mean structure is linear in the covariates.  The covariance
    structure is non-stationary, and is defined parametrically through
    'scaling', and 'smoothing' parameters.  The covariance of the
    process between two observations in the same group is a function
    of the distance between the time values of the two observations.
    The scaling and smoothing parameters can be linked to covariates.

    The observed data are modeled as the sum of the Gaussian process
    realization and (optionally) independent white noise.  The standard
    deviation of the white noise can be linked to covariates.

    The data should be provided in 'long form', with a group label to
    indicate which observations belong to the same group.
    Observations in different groups are always independent.

    Parameters
    ----------
    endog : array_like
        The dependent variable.
    exog : array_like
        The design matrix for the mean structure
    exog_scale : array_like
        The design matrix for the scaling structure
    exog_smooth : array_like
        The design matrix for the smoothness structure
    exog_noise : array_like
        The design matrix for the additive white noise. The
        linear predictor is the log of the white noise standard
        deviation.  If None, there is no additive noise (the
        process is observed directly).
    time : array_like (1-dimensional)
        The univariate index values, used to calculate distances
        between observations in the same group, which determines
        their correlations.
    groups : array_like (1-dimensional)
        The group values.
    cov : a ProcessCovariance instance
        Defaults to GaussianCovariance.
    """

    def __init__(self,
                 endog,
                 exog,
                 exog_scale,
                 exog_smooth,
                 exog_noise,
                 time,
                 groups,
                 cov=None,
                 **kwargs):

        super().__init__(
            endog,
            exog,
            exog_scale=exog_scale,
            exog_smooth=exog_smooth,
            exog_noise=exog_noise,
            time=time,
            groups=groups,
            **kwargs)

        self._has_noise = exog_noise is not None

        # Create parameter names
        xnames = []
        if hasattr(exog, "columns"):
            xnames = list(exog.columns)
        else:
            xnames = ["Mean%d" % j for j in range(exog.shape[1])]

        if hasattr(exog_scale, "columns"):
            xnames += list(exog_scale.columns)
        else:
            xnames += ["Scale%d" % j for j in range(exog_scale.shape[1])]

        if hasattr(exog_smooth, "columns"):
            xnames += list(exog_smooth.columns)
        else:
            xnames += ["Smooth%d" % j for j in range(exog_smooth.shape[1])]

        if self._has_noise:
            if hasattr(exog_noise, "columns"):
                # If pandas-like, get the actual column names
                xnames += list(exog_noise.columns)
            else:
                # If numpy-like, create default names
                xnames += ["Noise%d" % j for j in range(exog_noise.shape[1])]

        self.data.param_names = xnames

        if cov is None:
            cov = GaussianCovariance()
        self.cov = cov

        _check_args(endog, exog, exog_scale, exog_smooth, exog_noise,
                    time, groups)

        groups_ix = collections.defaultdict(list)
        for i, g in enumerate(groups):
            groups_ix[g].append(i)
        self._groups_ix = groups_ix

        # Default, can be set in call to fit.
        self.verbose = False

        self.k_exog = self.exog.shape[1]
        self.k_scale = self.exog_scale.shape[1]
        self.k_smooth = self.exog_smooth.shape[1]
        if self._has_noise:
            self.k_noise = self.exog_noise.shape[1]

    def _split_param_names(self):
        xnames = self.data.param_names
        q = 0
        mean_names = xnames[q:q+self.k_exog]
        q += self.k_exog
        scale_names = xnames[q:q+self.k_scale]
        q += self.k_scale
        smooth_names = xnames[q:q+self.k_smooth]

        if self._has_noise:
            q += self.k_noise
            noise_names = xnames[q:q+self.k_noise]
        else:
            noise_names = []

        return mean_names, scale_names, smooth_names, noise_names

    @classmethod
    def from_formula(cls,
                     formula,
                     data,
                     subset=None,
                     drop_cols=None,
                     *args,
                     **kwargs):

        if "scale_formula" in kwargs:
            scale_formula = kwargs["scale_formula"]
        else:
            raise ValueError("scale_formula is a required argument")

        if "smooth_formula" in kwargs:
            smooth_formula = kwargs["smooth_formula"]
        else:
            raise ValueError("smooth_formula is a required argument")

        if "noise_formula" in kwargs:
            noise_formula = kwargs["noise_formula"]
        else:
            noise_formula = None

        if "time" in kwargs:
            time = kwargs["time"]
        else:
            raise ValueError("time is a required argument")

        if "groups" in kwargs:
            groups = kwargs["groups"]
        else:
            raise ValueError("groups is a required argument")

        if subset is not None:
            warnings.warn("'subset' is ignored")

        if drop_cols is not None:
            warnings.warn("'drop_cols' is ignored")

        if isinstance(time, str):
            time = np.asarray(data[time])

        if isinstance(groups, str):
            groups = np.asarray(data[groups])

        exog_scale = patsy.dmatrix(scale_formula, data)
        scale_design_info = exog_scale.design_info
        scale_names = scale_design_info.column_names
        exog_scale = np.asarray(exog_scale)

        exog_smooth = patsy.dmatrix(smooth_formula, data)
        smooth_design_info = exog_smooth.design_info
        smooth_names = smooth_design_info.column_names
        exog_smooth = np.asarray(exog_smooth)

        if noise_formula is not None:
            exog_noise = patsy.dmatrix(noise_formula, data)
            noise_design_info = exog_noise.design_info
            noise_names = noise_design_info.column_names
            exog_noise = np.asarray(exog_noise)
        else:
            exog_noise, noise_design_info, noise_names, exog_noise =\
                None, None, [], None

        mod = super().from_formula(
            formula,
            data=data,
            subset=None,
            exog_scale=exog_scale,
            exog_smooth=exog_smooth,
            exog_noise=exog_noise,
            time=time,
            groups=groups)

        mod.data.scale_design_info = scale_design_info
        mod.data.smooth_design_info = smooth_design_info

        if mod._has_noise:
            mod.data.noise_design_info = noise_design_info

        mod.data.param_names = (mod.exog_names + scale_names +
                                smooth_names + noise_names)

        return mod

    def unpack(self, z):
        """
        Split the packed parameter vector into blocks.
        """

        # Mean parameters
        pm = self.exog.shape[1]
        mnpar = z[0:pm]

        # Standard deviation parameters
        pv = self.exog_scale.shape[1]
        scpar = z[pm:pm + pv]

        # Smoothness parameters
        ps = self.exog_smooth.shape[1]
        smpar = z[pm + pv:pm + pv + ps]

        # Observation white noise standard deviation.
        # Empty if has_noise = False.
        nopar = z[pm + pv + ps:]

        return mnpar, scpar, smpar, nopar

    def _get_start(self):

        # Use OLS to get starting values for mean structure parameters
        model = OLS(self.endog, self.exog)
        result = model.fit()

        m = self.exog_scale.shape[1] + self.exog_smooth.shape[1]

        if self._has_noise:
            m += self.exog_noise.shape[1]

        return np.concatenate((result.params, np.zeros(m)))

    def loglike(self, params):
        """
        Calculate the log-likelihood function for the model.

        Parameters
        ----------
        params : array_like
            The packed parameters for the model.

        Returns
        -------
        The log-likelihood value at the given parameter point.

        Notes
        -----
        The mean, scaling, and smoothing parameters are packed into
        a vector.  Use `unpack` to access the component vectors.
        """

        mnpar, scpar, smpar, nopar = self.unpack(params)

        # Residuals
        resid = self.endog - np.dot(self.exog, mnpar)

        # Scaling parameters
        sc = np.exp(np.dot(self.exog_scale, scpar))

        # Smoothness parameters
        sm = np.exp(np.dot(self.exog_smooth, smpar))

        # White noise standard deviation
        if self._has_noise:
            no = np.exp(np.dot(self.exog_noise, nopar))

        # Get the log-likelihood
        ll = 0.
        for _, ix in self._groups_ix.items():

            # Get the covariance matrix for this person.
            cm = self.cov.get_cov(self.time[ix], sc[ix], sm[ix])

            # The variance of the additive noise, if present.
            if self._has_noise:
                cm.flat[::cm.shape[0] + 1] += no[ix]**2

            re = resid[ix]
            ll -= 0.5 * np.linalg.slogdet(cm)[1]
            ll -= 0.5 * np.dot(re, np.linalg.solve(cm, re))

        if self.verbose:
            print("L=", ll)

        return ll

    def score(self, params):
        """
        Calculate the score function for the model.

        Parameters
        ----------
        params : array_like
            The packed parameters for the model.

        Returns
        -------
        The score vector at the given parameter point.

        Notes
        -----
        The mean, scaling, and smoothing parameters are packed into
        a vector.  Use `unpack` to access the component vectors.
        """

        mnpar, scpar, smpar, nopar = self.unpack(params)
        pm, pv, ps = len(mnpar), len(scpar), len(smpar)

        # Residuals
        resid = self.endog - np.dot(self.exog, mnpar)

        # Scaling
        sc = np.exp(np.dot(self.exog_scale, scpar))

        # Smoothness
        sm = np.exp(np.dot(self.exog_smooth, smpar))

        # White noise standard deviation
        if self._has_noise:
            no = np.exp(np.dot(self.exog_noise, nopar))

        # Get the log-likelihood
        score = np.zeros(len(mnpar) + len(scpar) + len(smpar) + len(nopar))
        for _, ix in self._groups_ix.items():

            sc_i = sc[ix]
            sm_i = sm[ix]
            resid_i = resid[ix]
            time_i = self.time[ix]
            exog_i = self.exog[ix, :]
            exog_scale_i = self.exog_scale[ix, :]
            exog_smooth_i = self.exog_smooth[ix, :]

            # Get the covariance matrix for this person.
            cm = self.cov.get_cov(time_i, sc_i, sm_i)

            if self._has_noise:
                no_i = no[ix]
                exog_noise_i = self.exog_noise[ix, :]
                cm.flat[::cm.shape[0] + 1] += no[ix]**2

            cmi = np.linalg.inv(cm)

            jacv, jacs = self.cov.jac(time_i, sc_i, sm_i)

            # The derivatives for the mean parameters.
            dcr = np.linalg.solve(cm, resid_i)
            score[0:pm] += np.dot(exog_i.T, dcr)

            # The derivatives for the scaling parameters.
            rx = np.outer(resid_i, resid_i)
            qm = np.linalg.solve(cm, rx)
            qm = 0.5 * np.linalg.solve(cm, qm.T)
            scx = sc_i[:, None] * exog_scale_i
            for i, _ in enumerate(ix):
                jq = np.sum(jacv[i] * qm)
                score[pm:pm + pv] += jq * scx[i, :]
                score[pm:pm + pv] -= 0.5 * np.sum(jacv[i] * cmi) * scx[i, :]

            # The derivatives for the smoothness parameters.
            smx = sm_i[:, None] * exog_smooth_i
            for i, _ in enumerate(ix):
                jq = np.sum(jacs[i] * qm)
                score[pm + pv:pm + pv + ps] += jq * smx[i, :]
                score[pm + pv:pm + pv + ps] -= (
                         0.5 * np.sum(jacs[i] * cmi) * smx[i, :])

            # The derivatives with respect to the standard deviation parameters
            if self._has_noise:
                sno = no_i[:, None]**2 * exog_noise_i
                score[pm + pv + ps:] -= np.dot(cmi.flat[::cm.shape[0] + 1],
                                               sno)
                bm = np.dot(cmi, np.dot(rx, cmi))
                score[pm + pv + ps:] += np.dot(bm.flat[::bm.shape[0] + 1], sno)

        if self.verbose:
            print("|G|=", np.sqrt(np.sum(score * score)))

        return score

    def hessian(self, params):

        hess = approx_fprime(params, self.score)
        return hess

    def fit(self, start_params=None, method=None, maxiter=None,
            **kwargs):
        """
        Fit a grouped Gaussian process regression using MLE.

        Parameters
        ----------
        start_params : array_like
            Optional starting values.
        method : str or array of str
            Method or sequence of methods for scipy optimize.
        maxiter : int
            The maximum number of iterations in the optimization.

        Returns
        -------
        An instance of ProcessMLEResults.
        """

        if "verbose" in kwargs:
            self.verbose = kwargs["verbose"]

        minim_opts = {}
        if "minim_opts" in kwargs:
            minim_opts = kwargs["minim_opts"]

        if start_params is None:
            start_params = self._get_start()

        if isinstance(method, str):
            method = [method]
        elif method is None:
            method = ["powell", "bfgs"]

        for j, meth in enumerate(method):

            if meth not in ("powell",):
                def jac(x):
                    return -self.score(x)
            else:
                jac = None

            if maxiter is not None:
                if np.isscalar(maxiter):
                    minim_opts["maxiter"] = maxiter
                else:
                    minim_opts["maxiter"] = maxiter[j % len(maxiter)]

            f = minimize(
                lambda x: -self.loglike(x),
                method=meth,
                x0=start_params,
                jac=jac,
                options=minim_opts)

            if not f.success:
                msg = "Fitting did not converge"
                if jac is not None:
                    msg += ", |gradient|=%.6f" % np.sqrt(np.sum(f.jac**2))
                if j < len(method) - 1:
                    msg += ", trying %s next..." % method[j+1]
                warnings.warn(msg)

            if np.isfinite(f.x).all():
                start_params = f.x

        hess = self.hessian(f.x)
        try:
            cov_params = -np.linalg.inv(hess)
        except Exception:
            cov_params = None

        class rslt:
            pass

        r = rslt()
        r.params = f.x
        r.normalized_cov_params = cov_params
        r.optim_retvals = f
        r.scale = 1

        rslt = ProcessMLEResults(self, r)

        return rslt

    def covariance(self, time, scale_params, smooth_params, scale_data,
                   smooth_data):
        """
        Returns a Gaussian process covariance matrix.

        Parameters
        ----------
        time : array_like
            The time points at which the fitted covariance matrix is
            calculated.
        scale_params : array_like
            The regression parameters for the scaling part
            of the covariance structure.
        smooth_params : array_like
            The regression parameters for the smoothing part
            of the covariance structure.
        scale_data : DataFrame
            The data used to determine the scale parameter,
            must have len(time) rows.
        smooth_data : DataFrame
            The data used to determine the smoothness parameter,
            must have len(time) rows.

        Returns
        -------
        A covariance matrix.

        Notes
        -----
        If the model was fit using formulas, `scale` and `smooth` should
        be Dataframes, containing all variables that were present in the
        respective scaling and smoothing formulas used to fit the model.
        Otherwise, `scale` and `smooth` should contain data arrays whose
        columns align with the fitted scaling and smoothing parameters.

        The covariance is only for the Gaussian process and does not include
        the white noise variance.
        """

        if not hasattr(self.data, "scale_design_info"):
            sca = np.dot(scale_data, scale_params)
            smo = np.dot(smooth_data, smooth_params)
        else:
            sc = patsy.dmatrix(self.data.scale_design_info, scale_data)
            sm = patsy.dmatrix(self.data.smooth_design_info, smooth_data)
            sca = np.exp(np.dot(sc, scale_params))
            smo = np.exp(np.dot(sm, smooth_params))

        return self.cov.get_cov(time, sca, smo)

    def predict(self, params, exog=None, *args, **kwargs):
        """
        Obtain predictions of the mean structure.

        Parameters
        ----------
        params : array_like
            The model parameters, may be truncated to include only mean
            parameters.
        exog : array_like
            The design matrix for the mean structure.  If not provided,
            the model's design matrix is used.
        """

        if exog is None:
            exog = self.exog
        elif hasattr(self.data, "design_info"):
            # Run the provided data through the formula if present
            exog = patsy.dmatrix(self.data.design_info, exog)

        if len(params) > exog.shape[1]:
            params = params[0:exog.shape[1]]

        return np.dot(exog, params)


class ProcessMLEResults(base.GenericLikelihoodModelResults):
    """
    Results class for Gaussian process regression models.
    """

    def __init__(self, model, mlefit):

        super().__init__(
            model, mlefit)

        pa = model.unpack(mlefit.params)

        self.mean_params = pa[0]
        self.scale_params = pa[1]
        self.smooth_params = pa[2]
        self.no_params = pa[3]

        self.df_resid = model.endog.shape[0] - len(mlefit.params)

        self.k_exog = self.model.exog.shape[1]
        self.k_scale = self.model.exog_scale.shape[1]
        self.k_smooth = self.model.exog_smooth.shape[1]

        self._has_noise = model._has_noise
        if model._has_noise:
            self.k_noise = self.model.exog_noise.shape[1]

    def predict(self, exog=None, transform=True, *args, **kwargs):

        if not transform:
            warnings.warn("'transform=False' is ignored in predict")

        if len(args) > 0 or len(kwargs) > 0:
            warnings.warn("extra arguments ignored in 'predict'")

        return self.model.predict(self.params, exog)

    def covariance(self, time, scale, smooth):
        """
        Returns a fitted covariance matrix.

        Parameters
        ----------
        time : array_like
            The time points at which the fitted covariance
            matrix is calculated.
        scale : array_like
            The data used to determine the scale parameter,
            must have len(time) rows.
        smooth : array_like
            The data used to determine the smoothness parameter,
            must have len(time) rows.

        Returns
        -------
        A covariance matrix.

        Notes
        -----
        If the model was fit using formulas, `scale` and `smooth` should
        be Dataframes, containing all variables that were present in the
        respective scaling and smoothing formulas used to fit the model.
        Otherwise, `scale` and `smooth` should be data arrays whose
        columns align with the fitted scaling and smoothing parameters.
        """

        return self.model.covariance(time, self.scale_params,
                                     self.smooth_params, scale, smooth)

    def covariance_group(self, group):

        # Check if the group exists, since _groups_ix is a
        # DefaultDict use len instead of catching a KeyError.
        ix = self.model._groups_ix[group]
        if len(ix) == 0:
            msg = "Group '%s' does not exist" % str(group)
            raise ValueError(msg)

        scale_data = self.model.exog_scale[ix, :]
        smooth_data = self.model.exog_smooth[ix, :]

        _, scale_names, smooth_names, _ = self.model._split_param_names()

        scale_data = pd.DataFrame(scale_data, columns=scale_names)
        smooth_data = pd.DataFrame(smooth_data, columns=smooth_names)
        time = self.model.time[ix]

        return self.model.covariance(time,
                                     self.scale_params,
                                     self.smooth_params,
                                     scale_data,
                                     smooth_data)

    def summary(self, yname=None, xname=None, title=None, alpha=0.05):

        df = pd.DataFrame()

        typ = (["Mean"] * self.k_exog + ["Scale"] * self.k_scale +
               ["Smooth"] * self.k_smooth)
        if self._has_noise:
            typ += ["SD"] * self.k_noise
        df["Type"] = typ

        df["coef"] = self.params

        try:
            df["std err"] = np.sqrt(np.diag(self.cov_params()))
        except Exception:
            df["std err"] = np.nan

        from scipy.stats.distributions import norm
        df["tvalues"] = df.coef / df["std err"]
        df["P>|t|"] = 2 * norm.sf(np.abs(df.tvalues))

        f = norm.ppf(1 - alpha / 2)
        df["[%.3f" % (alpha / 2)] = df.coef - f * df["std err"]
        df["%.3f]" % (1 - alpha / 2)] = df.coef + f * df["std err"]

        df.index = self.model.data.param_names

        summ = summary2.Summary()
        if title is None:
            title = "Gaussian process regression results"
        summ.add_title(title)
        summ.add_df(df)

        return summ
