
    M/Ph(                     P    d Z ddlZddlmZ ddZd Zd Zd	 Zd
 Z	d Z
d Zd ZdS )ai  
Univariate lowess function, like in R.

References
----------
Hastie, Tibshirani, Friedman. (2009) The Elements of Statistical Learning: Data Mining, Inference, and Prediction, Second Edition: Chapter 6.

Cleveland, W.S. (1979) "Robust Locally Weighted Regression and Smoothing Scatterplots". Journal of the American Statistical Association 74 (368): 829-836.
    N)lstsqUUUUUU?   c           	      X   |}|j         dk    rt          d          | j         dk    rt          d          | j        d         |j        d         k    rt          d          |j        d         }t          j        |          }t          ||z            }t          j        |          }t          j        ||                   }	| |         }
t          |	|
||          \  }}t          |          D ]}t          |	|
||||           t          j        |	|g          j        }|df|_        |S )a  
    LOWESS (Locally Weighted Scatterplot Smoothing)

    A lowess function that outs smoothed estimates of endog
    at the given exog values from points (exog, endog)

    Parameters
    ----------
    endog : 1-D numpy array
        The y-values of the observed points
    exog : 1-D numpy array
        The x-values of the observed points
    frac : float
        Between 0 and 1. The fraction of the data used
        when estimating each y-value.
    it : int
        The number of residual-based reweightings
        to perform.

    Returns
    -------
    out: numpy array
        A numpy array with two columns. The first column
        is the sorted x values and the second column the
        associated estimated y-values.

    Notes
    -----
    This lowess function implements the algorithm given in the
    reference below using local linear estimates.

    Suppose the input data has N points. The algorithm works by
    estimating the true ``y_i`` by taking the frac*N closest points
    to ``(x_i,y_i)`` based on their x values and estimating ``y_i``
    using a weighted linear regression. The weight for ``(x_j,y_j)``
    is `_lowess_tricube` function applied to ``|x_i-x_j|``.

    If ``iter > 0``, then further weighted local linear regressions
    are performed, where the weights are the same as above
    times the `_lowess_bisquare` function of the residuals. Each iteration
    takes approximately the same amount of time as the original fit,
    so these iterations are expensive. They are most useful when
    the noise has extremely heavy tails, such as Cauchy noise.
    Noise with less heavy-tails, such as t-distributions with ``df > 2``,
    are less problematic. The weights downgrade the influence of
    points with large residuals. In the extreme case, points whose
    residuals are larger than 6 times the median absolute residual
    are given weight 0.

    Some experimentation is likely required to find a good
    choice of frac and iter for a particular dataset.

    References
    ----------
    Cleveland, W.S. (1979) "Robust Locally Weighted Regression
    and Smoothing Scatterplots". Journal of the American Statistical
    Association 74 (368): 829-836.

    Examples
    --------
    The below allows a comparison between how different the fits from
    `lowess` for different values of frac can be.

    >>> import numpy as np
    >>> import statsmodels.api as sm
    >>> lowess = sm.nonparametric.lowess
    >>> x = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=500)
    >>> y = np.sin(x) + np.random.normal(size=len(x))
    >>> z = lowess(y, x)
    >>> w = lowess(y, x, frac=1./3)

    This gives a similar comparison for when it is 0 vs not.

    >>> import scipy.stats as stats
    >>> x = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=500)
    >>> y = np.sin(x) + stats.cauchy.rvs(size=len(x))
    >>> z = lowess(y, x, frac= 1./3, it=0)
    >>> w = lowess(y, x, frac=1./3)
       zexog must be a vectorzendog must be a vectorr   z$exog and endog must have same length   )ndim
ValueErrorshapenpzerosintargsortarray_lowess_initial_fitrange_lowess_robustify_fitT)endogexogfracitxnfittedkindex_arrayx_copyy_copyweightsiouts                 n/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/nonparametric/smoothers_lowess_old.pylowessr$      s1   ` 	AyA~~0111zQ1222{1~##?@@@
1AXa[[FD1HA*T""KXd;'((F;F)&&!Q??OFG2YY - -fff%q!	- 	- 	- 	- (FF#
$
$
&C1CIJ    c                    t          j        ||f| j                  }d|g}t          j        |df          }t          j        |          }t	          |          D ]b}| |         | |d                  z
  }	| |d         dz
           | |         z
  }
t          |	|
          }t          ||ddf         | |d         |d                  | |         |           t          ||ddf                    t          j        ||ddf                   ||ddf<   | |d         |d                  |dddf<   ||ddf         ||d         |d                  z  }t          ||ddf         
                    |d          |z  |d          d         }|d         |d         | |         z  z   ||<   t          | ||dz              d||fS )a  
    The initial weighted local linear regression for lowess.

    Parameters
    ----------
    x_copy : 1-d ndarray
        The x-values/exogenous part of the data being smoothed
    y_copy : 1-d ndarray
        The y-values/ endogenous part of the data being smoothed
   k : int
        The number of data points which affect the linear fit for
        each estimated point
    n : int
        The total number of points

    Returns
    -------
    fitted : 1-d ndarray
        The fitted y-values
    weights : 2-d ndarray
        An n by k array. The contribution to the weights in the
        local linear fit coming from the distances between the
        x-values

   )dtyper   r   r   Nrcond)r   r   r'   onesr   max_lowess_wt_standardize_lowess_tricubesqrtr   reshape_lowess_update_nn)r   r   r   r   r    
nn_indicesXr   r!   
left_widthright_widthwidthy_ibetas                 r#   r   r   |   s   4 h!ufl333GAJ
1AXa[[F1XX 3 3AY
1!66
Z]1_-q	9J,,wqs| &z!}Z]'B C"1Iu	. 	. 	. 	!!!%%%wwqs|,,!!!
1jm34!!!A#aclVJqM*Q-$?@@WQqqqS\))!A..2CrBBB1EGd1gfQi//q	&*ac2222 7?r%   c                 (    || dd<   | |z  } | |z  } dS )a.  
    The initial phase of creating the weights.
    Subtract the current x_i and divide by the width.

    Parameters
    ----------
    weights : ndarray
        The memory where (new_entries - x_copy_i)/width will be placed
    new_entries : ndarray
        The x-values of the k closest points to x[i]
    x_copy_i : float
        x[i], the i'th point in the (sorted) x values
    width : float
        The maximum distance between x[i] and any point in new_entries

    Returns
    -------
    Nothing. The modifications are made to weight in place.
    N )r    new_entriesx_copy_ir6   s       r#   r-   r-      s)    ( GAAAJxGuGGGr%   c                    d|g}t          j        |df          }t          j        |          }|f|_        ||z  }t          j        |          }t          j        |          }	|d|	z  z  }|dk    }
t          |           d||
<   t          |          D ]}||ddf         t          j        ||d         |d                            z  }| |d         |d                  |dddf<   |||d         |d                  z  }|df|_        t          ||z  |d          d         }|d         |d         | |         z  z   ||<   t          | ||dz              dS )ah  
    Additional weighted local linear regressions, performed if
    iter>0. They take into account the sizes of the residuals,
    to eliminate the effect of extreme outliers.

    Parameters
    ----------
    x_copy : 1-d ndarray
        The x-values/exogenous part of the data being smoothed
    y_copy : 1-d ndarray
        The y-values/ endogenous part of the data being smoothed
    fitted : 1-d ndarray
        The fitted y-values from the previous iteration
    weights : 2-d ndarray
        An n by k array. The contribution to the weights in the
        local linear fit coming from the distances between the
        x-values
    k : int
        The number of data points which affect the linear fit for
        each estimated point
    n : int
        The total number of points

   Returns
    -------
    Nothing. The fitted values are modified in place.
    r   r      r   Nr(   r)   )r   r+   copyr   absolutemedian_lowess_bisquarer   r/   r   r1   )r   r   r   r    r   r   r2   r3   residual_weightsstoo_bigr!   total_weightsr7   r8   s                  r#   r   r      s   8 AJ
1AwvT{#344
	"##A1!G%&&& !W 1XX 3 3!!!rw/?
18B1AF 0G (H (H H 
1jm34!!!A#fZ]:a=%@AA e]Q&2666q9Gd1gq	11q	&*ac22223 3r%   c                     	 |d         | j         k     rS| |         | |d                  z
  }| |d                  | |         z
  }||k     r|d         dz   |d<   |d         dz   |d<   ndS dS g)a  
    Update the endpoints of the nearest neighbors to
    the ith point.

    Parameters
    ----------
    x : iterable
        The sorted points of x-values
    cur_nn : list of length 2
        The two current indices between which are the
        k closest points to x[i]. (The actual value of
        k is irrelevant for the algorithm.
    i : int
        The index of the current value in x for which
        the k closest points are desired.

    Returns
    -------
    Nothing. It modifies cur_nn in place.
    Tr   r   N)size)r   cur_nnr!   	left_distnew_right_dists        r#   r1   r1     s    *
!9QV!q|+Ivay\AaD0N	))"1IMq	"1IMq		E
r%   c                     t          j        |           | dd<   t          |            t          j        |           | dd<   | dz  } t          |            dS )a  
    The _tricube function applied to a numpy array.
    The tricube function is (1-abs(t)**3)**3.

    Parameters
    ----------
    t : ndarray
        Array the tricube function is applied to elementwise and
        in-place.

    Returns
    -------
    Nothing
    Nr   )r   r@   _lowess_mycubenegativets    r#   r.   r.   '  sY      ;q>>AaaaD1;q>>AaaaDFA1r%   c                     | | z  }| |z  } dS )z
    Fast matrix cube

    Parameters
    ----------
    t : ndarray
        Array that is cubed, elementwise and in-place

    Returns
    -------
    Nothing
    Nr:   )rP   t2s     r#   rM   rM   >  s     
1BGAAAr%   c                 V    | | z  } t          j        |           | dd<   | dz  } | | z  } dS )a  
    The bisquare function applied to a numpy array.
    The bisquare function is (1-t**2)**2.

    Parameters
    ----------
    t : ndarray
        array bisquare function is applied to, element-wise and in-place.

    Returns
    -------
    Nothing
    Nr   )r   rN   rO   s    r#   rB   rB   P  s8     FA;q>>AaaaDFAFAAAr%   )r   r   )__doc__numpyr   numpy.linalgr   r$   r   r-   r   r1   r.   rM   rB   r:   r%   r#   <module>rW      s              k k k k\4 4 4n  263 63 63r  D  .  $    r%   