
    M/Ph;                         d Z ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlZdZddZ G d	 d
          Z G d d          Z G d d          Z G d dee          ZdS )a_  
Generalized additive models



Requirements for smoothers
--------------------------

smooth(y, weights=xxx) : ? no return ? alias for fit
predict(x=None) : smoothed values, fittedvalues or for new exog
df_fit() : degress of freedom of fit ?


Notes
-----
- using PolySmoother works for AdditiveModel, and GAM with Poisson and Binomial
- testfailure with Gamma, no other families tested
- there is still an indeterminacy in the split up of the constant across
  components (smoothers) and alpha, sum, i.e. constant, looks good.
  - role of offset, that I have not tried to figure out yet

Refactoring
-----------
currently result is attached to model instead of other way around
split up Result in class for AdditiveModel and for GAM,
subclass GLMResults, needs verification that result statistics are appropriate
how much inheritance, double inheritance?
renamings and cleanup
interface to other smoothers, scipy splines

basic unittests as support for refactoring exist, but we should have a test
case for gamma and the others. Advantage of PolySmoother is that we can
benchmark against the parametric GLM results.

    N)families)PolySmoother)GLM)IterationLimitWarningiteration_limit_docFc                 (   t          j        |           }| j        d         }|dk     r|}n t          j        d          t          j        d          z  }t          j        d          t          j        d          z  }t          j        d          t          j        d          z  }t          j        d          t          j        d          z  }|dk     rd|||z
  |dz
  z  dz  z   z  }nA|d	k     rd|||z
  |dz
  z  d
z  z   z  }n&|dk     rd|||z
  |d	z
  z  dz  z   z  }nd|dz
  dz  z   }|t          j        d|dz
  |                              t           j                           }	|d}
n|}
t          |
|                                           }|S )z

    r   i  2      d         g     b@i   g     @i  g     @g      @g?   N   )x)	npsortshapeloglinspaceastypeint32r   copy)r   s_arg_xnnknotsa1a2a3a4knotsorderss               W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/gam.pydefault_smootherr%   <   s    
B	
A 	3wwVBZZ"&))#VC[[26!99$VC[[26!99$VC[[26!99$s77rBw1r624778FFWWrBw1s73D889FFXXrBw1s73E99:FFAI++Fr{1ac6**11"(;;<E }Uaffhh'''A H    c                       e Zd Zd Zd ZdS )Offsetc                 "    || _         || _        d S Nfnoffset)selfr,   r-   s      r$   __init__zOffset.__init__e   s    r&   c                 ,     | j         |i || j        z   S r*   r+   )r.   argskws      r$   __call__zOffset.__call__i   s     tw###dk11r&   N)__name__
__module____qualname__r/   r3    r&   r$   r(   r(   c   s2          2 2 2 2 2r&   r(   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )Resultsc                     |j         \  | _        | _        || _        || _        || _        || _        || _        || _        || _        | 	                    |          | _
        d S r*   )r   nobsk_varsYalpha	smoothersr-   familyexoglinkinversepredictmu)r.   r=   r>   rA   r?   r@   r-   s          r$   r/   zResults.__init__n   s^    !%	4; 
"	))$//r&   c                 ,    |                      |          S )z\expected value ? check new GLM, same as mu for given exog
        maybe remove this
        )rB   r.   rA   s     r$   r3   zResults.__call__}   s     &&t,,,r&   c                 f    | j         j                            |                     |                    S )zBexpected value ? check new GLM, same as mu for given exog
        )r@   linkinversepredictrE   s     r$   rB   zResults.linkinversepredict   s)     {''T(:(:;;;r&   c                 t   |                      |          }|j        d         | j        k    rKddl} |j        dt
                     t          j        |                      |          d          | j        z   S |j        d         | j        k    rt          j        |d          | j        z   S t          d          )z{predict response, sum of smoothed components
        TODO: What's this in the case of GLM, corresponds to X*beta ?
        r   Nz&old orientation, colvars, will go away)axisr   zshape mismatch in predict)
smoothedr   r<   warningswarnFutureWarningr   sumr>   
ValueError)r.   rA   exog_smoothedrM   s       r$   rI   zResults.predict   s     d++q!T[00OOOHMB') ) )6$----A666CCq!T[006-a0004:==8999r&   c                      t          j         fdt          j        d                   D                       j        S )z4get smoothed prediction for each component

        c                     g | ]:}j         |                             d d |f                   j        |         z   ;S r*   )r?   rI   r-   ).0irA   r.   s     r$   
<listcomp>z$Results.smoothed.<locals>.<listcomp>   sW     8 8 8 	 *224!9==AN 8 8 8r&   r   )r   arrayranger   TrE   s   ``r$   rL   zResults.smoothed   sZ     x 8 8 8 8 8 #(
1"6"6	8 8 8 9 9 :;		;r&   c                     |                      |          }|                    d          }|                                | j        z   }||z
  }||fS )Nr   )rL   meanrP   r>   )r.   rA   
componentsmeansconstantcomponents_demeaneds         r$   smoothed_demeanedzResults.smoothed_demeaned   sN    ]]4((
""99;;+(50"H,,r&   N)	r4   r5   r6   r/   r3   rB   rI   rL   ra   r7   r&   r$   r9   r9   l   sn        0 0 0- - -< < <
: : :.; ; ;- - - - -r&   r9   c                   @    e Zd ZdZddZd Zd Zd Zd Zd Z	ddZ
dS )AdditiveModela  additive model with non-parametric, smoothed components

    Parameters
    ----------
    exog : ndarray
    smoothers : None or list of smoother instances
        smoother instances not yet checked
    weights : None or ndarray
    family : None or family instance
        I think only used because of shared results with GAM and subclassing.
        If None, then Gaussian is used.
    Nc                 z   | _         ||| _        n)t          j        | j         j        d                   | _        |p%fdt          j        d                   D             | _        t          j        d                   D ]}d| j        |         _        |t          j	                    | _
        d S || _
        d S )Nr   c                 B    g | ]}t          d d |f                   S r*   )r%   )rU   rV   rA   s     r$   rW   z*AdditiveModel.__init__.<locals>.<listcomp>   s.    &a&a&aq'7QQQqS	'B'B&a&a&ar&   r   
   )rA   weightsr   onesr   rY   r?   dfr   Gaussianr@   )r.   rA   r?   rg   r@   rV   s    `    r$   r/   zAdditiveModel.__init__   s    	"DLL749?1#566DL"a&a&a&a&aERVR\]^R_L`L`&a&a&a tz!}%% 	& 	&A#%DN1  >"+--DKKK DKKKr&   c                 6    d| _         t          j        | _        | S )z3initialize iteration ?, should be removed

        r   )iterr   infdevr.   s    r$   _iter__zAdditiveModel._iter__   s     	6r&   c                 *   | j         }| j         j        }|                    | j                  }t	          j        | j        j        d         t          j                  }|| j        z  	                                | j        	                                z  }t          | j        j        d                   D ]2}| j        |                                         }t	          j        ||z
  |z
  |z                                             }|r!t          ||||           t          d          | j        |                             ||z
  |z   | j                   | j        |                                         }	|	| j        z  	                                 | j        	                                z  | j         j        |<   t$          rt          | j        |         j                   ||	|z
  z  }4| j         j        }t)          ||| j        | j        | j        |          S )a  internal calculation for one fit iteration

        BUG: I think this does not improve, what is supposed to improve
            offset does not seem to be used, neither an old alpha
            The smoothers keep coef/params from previous iteration
        r   znan encounteredrg   )resultsr=   rI   rA   r   zerosr   float64rg   rP   rY   r?   isnananyprintrQ   smoothr-   DEBUGparamsr9   r@   )
r.   _resultsr=   rC   r-   r>   rV   tmpbadtmp2s
             r$   nextzAdditiveModel.next   s    <LNdi(($)/!,bj99T\!&&((4<+;+;+=+==tyq)** 	 	A.#++--C (1u9r>C/004466C 4aC((( !2333N1$$QVc\-1\ % ; ; ;>!$,,..D'+DL'8&=&=&?&?%?$,BRBRBTBT%TDL" 0dnQ'.///$*BB $ q%DNDKPPPr&   c                    | xj         dz  c_         t          r`t          | j         | j        j        j                   t          | j                            | j                  j        | j        j                   | j        j        | j                            | j                  z
  dz  | j        z  	                                }| j         | j
        k    rdS t          j        | j        |z
  |z            | j        k     r	|| _        dS || _        dS )zcondition to continue iteration loop

        Parameters
        ----------
        tol

        Returns
        -------
        cont : bool
            If true, then iteration should be continued.

        r   r
   FT)rl   rz   rx   rs   r=   r   rI   rA   rg   rP   maxiterr   fabsrn   rtol)r.   curdevs     r$   contzAdditiveModel.cont  s     			Q		 	M$)T\^1222$,&&ty1179KLLLLNT\%9%9$)%D%DDqHDLX]]__9t|##57DHv%/0049<<DH5 tr&   c                       j         j        j        d         t          j         fdt           j        j        d                   D                                                       z
  S )zIdegrees of freedom of residuals, ddof is sum of all smoothers df
        r   c                 N    g | ]!}j         |                                         "S r7   )r?   df_fit)rU   rV   r.   s     r$   rW   z*AdditiveModel.df_resid.<locals>.<listcomp>"  s-    2q2q2qRS4>!3D3K3K3M3M2q2q2qr&   r   )rs   r=   r   r   rX   rY   rA   rP   ro   s   `r$   df_residzAdditiveModel.df_resid  s]     |~#A&2q2q2q2qW\]a]f]lmn]oWpWp2q2q2q)r)r)v)v)x)xxxr&   c                     | j         j        |                      | j                  z
  dz                                  |                                 z  S )z1estimate standard deviation of residuals
        r
   )rs   r=   rA   rP   r   ro   s    r$   estimate_scalezAdditiveModel.estimate_scale$  s?     $,,ty"9"99A=BBDDt}}VVr&   ư>   c                    || _         || _        |                                  d}|| j        z                                  | j                                        z  }t          j        | j        j        d         t
          j	                  }t          | j        j        d                   D ]}| j        |                             ||z
  |z
  | j                   | j        |                                         }|| j        z                                  | j                                        z  ||<   ||                                z  }||z  }t          ||| j        | j        | j        |          | _        |                                 r-|                                 | _        |                                 -| j        | j        k    rt)          j        t,          t.                     | j        S )zwfit the model to a given endogenous variable Y

        This needs to change for consistency with statsmodels

        r   r   rr   )r   r   rp   rg   rP   r   rt   rA   r   ru   rY   r?   ry   rI   r9   r@   rs   r   r   rl   rM   rN   r   r   )	r.   r=   r   r   rC   r>   r-   rV   r}   s	            r$   fitzAdditiveModel.fit*  s    	T\!&&((4<+;+;+=+==$)/!,bj99tyq)** 	 	AN1$$QY^-1\ % ; ; ;.#++--Ct|+0022T\5E5E5G5GGF1I37799C#IBBq%DNDKQWXXiikk 	'99;;DL iikk 	' 9$$M-/DEEE|r&   )NNNr   r   )r4   r5   r6   __doc__r/   rp   r   r   r   r   r   r7   r&   r$   rc   rc      s         ! ! ! !&  $Q $Q $QL  :y y y
W W W     r&   rc   c                   J    e Zd Zd ej                    fdZd ZddZd	dZdS )
ModelNc                     t                               | |||           t          j        | |||           | j        |u sJ d S )Nr?   r@   )r@   )rc   r/   r   r@   )r.   endogrA   r?   r@   s        r$   r/   zModel.__init__[  sS     	tTYvNNNT5$v6666{f$$$$$$r&   c                    | j         }|j        }t          j        | j                                                  rt          d           | j        j        	                    |
                    | j                            |_        | j                            |j                  }t          j        |                                          r|| _        t          d           || _        t          rVt          dt          j        | j        j                            |j                                                                       |
                    | j                  | j        j                            |j                  ||j        z
  z  z   }t!          | j        | j        | j        | j                  }|                    |          }| j                            ||
                    | j                  g           ||_        | j        j        	                    |
                    | j                            |_        | xj        dz  c_        || _         |S )Nnanweights1nanweights2zderiv isnan)r?   rg   r@   r   )rs   r=   r   rv   rg   allrx   r@   rG   rH   rI   rA   rC   rz   derivrw   rc   r?   r   historyappendrl   )r.   r|   r=   rg   Zms         r$   r   z
Model.nextb  s   <J8DL!!%%'' 	!-   k&..x/?/?	/J/JKK +%%hk228G  "" 	!"DL-    	V-$+*:*@*@*M*M!N!N!R!R!T!TUUU TY''{%%hk22a(+oFG $)t~"&,t{D D D 5588Q 0 0 ; ;<===
k&..x/?/?	/J/JKK		Q		r&   c                     || j         }|| j        j        z
  }t          j        |d          | j                            | j        j                  z                                  | j        z  S )z9
        Return Pearson's X^2 estimate of scale.
        Nr
   )	r=   rs   rC   r   powerr@   variancerP   r   )r.   r=   resids      r$   r   zModel.estimate_scale  sa    
 9ADLO#""T[%9%9$,/%J%JJOOQQm$ 	$r&   r   r   c                    || _         || _        t          j        |t          j                  | _        g | _        |                                  | j                                        }| j	        
                    |          }| j	                            |          | j	        j                            |          ||z
  z  z   }t          | j        | j        | j	                  }|                    |          | _        | j	        j                            | j                            | j                            | j        _        || j        _        |                                 rR|                                 | _        |                                 x| _        | j        _        |                                 R| j        | j        k    rdd l} |j        t8          t:                     | j        S )Nr   r   )r   r   r   asarrayru   r=   r   rp   r\   r@   starting_murG   r   rc   rA   r?   r   rs   rH   rI   rC   r   r   r   scalerl   rM   rN   r   r   )	r.   r=   r   r   r>   mu0r   r   rM   s	            r$   r   z	Model.fit  s   	Arz** 	 k%%a((KU##dk&6&<&<U&C&Cq3w&OO$)t~dkRRRuuQxx+*224<3G3G	3R3RSSiikk 	D99;;DL.2.A.A.C.CCDJ+ iikk 	D 9$$OOOHM-/DEEE|r&   r*   r   )	r4   r5   r6   r   rj   r/   r   r   r   r7   r&   r$   r   r   K  ss          /3;L8;L;N;N % % % %" " "H	$ 	$ 	$ 	$     r&   r   r*   )r   numpyr   statsmodels.genmodr   +statsmodels.sandbox.nonparametric.smoothersr   +statsmodels.genmod.generalized_linear_modelr   statsmodels.tools.sm_exceptionsr   r   rM   rz   r%   r(   r9   rc   r   r7   r&   r$   <module>r      sY  " "`     ' ' ' ' ' ' D D D D D D ; ; ; ; ; ; V V V V V V V V % % % %N2 2 2 2 2 2 2 2E- E- E- E- E- E- E- E-NV V V V V V V Vpf f f f fC f f f f fr&   