
    M/Ph                     R    d Z ddlZddlZddlmZ  G d d          Zd Zd Z	d Z
dS )	a  
The RegressionFDR class implements the 'Knockoff' approach for
controlling false discovery rates (FDR) in regression analysis.

The knockoff approach does not require standard errors.  Thus one
application is to provide inference for parameter estimates that are
not smooth functions of the data.  For example, the knockoff approach
can be used to do inference for parameter estimates obtained from the
LASSO, of from stepwise variable selection.

The knockoff approach controls FDR for parameter estimates that may be
dependent, such as coefficient estimates in a multiple regression
model.

The knockoff approach is applicable whenever the test statistic can be
computed entirely from x'y and x'x, where x is the design matrix and y
is the vector of responses.

Reference
---------
Rina Foygel Barber, Emmanuel Candes (2015).  Controlling the False
Discovery Rate via Knockoffs.  Annals of Statistics 43:5.
https://candes.su.domains/publications/downloads/FDR_regression.pdf
    N)summary2c                   &    e Zd ZdZddZd Zd ZdS )RegressionFDRa  
    Control FDR in a regression procedure.

    Parameters
    ----------
    endog : array_like
        The dependent variable of the regression
    exog : array_like
        The independent variables of the regression
    regeffects : RegressionEffects instance
        An instance of a RegressionEffects class that can compute
        effect sizes for the regression coefficients.
    method : str
        The approach used to assess and control FDR, currently
        must be 'knockoff'.

    Returns
    -------
    Returns an instance of the RegressionFDR class.  The `fdr` attribute
    holds the estimated false discovery rates.

    Notes
    -----
    This class Implements the knockoff method of Barber and Candes.
    This is an approach for controlling the FDR of a variety of
    regression estimation procedures, including correlation
    coefficients, OLS regression, OLS with forward selection, and
    LASSO regression.

    For other approaches to FDR control in regression, see the
    statsmodels.stats.multitest module.  Methods provided in that
    module use Z-scores or p-values, and therefore require standard
    errors for the coefficient estimates to be available.

    The default method for constructing the augmented design matrix is
    the 'equivariant' approach, set `design_method='sdp'` to use an
    alternative approach involving semidefinite programming.  See
    Barber and Candes for more information about both approaches.  The
    sdp approach requires that the cvxopt package be installed.
    knockoffc                 :   t          |d          r|j        | _        n)d t          |j        d                   D             | _        t          j        |          }t          j        |          }d|vrd|d<   |j        \  }}|d         dk    rt          |          \  }}	}
n|d         dk    rt          |          \  }}	}
|t          j	        |          z
  }|| _
        t          j        ||	fd          | _        || _        |	| _        |                    |           | _        t          j        | j        dd	          \  }}}t          j        |          }t%          | j                  |z
  |z   }d||dk     <   t          j        || d
          dz
  }||         }d||dk     <   d|z   |z  }||z  }||         | _        ||         | _        || _        || _        t1          j        | j                  }| j        |d<   | j        |d<   | j        |d<   || _        d S )Ncolumnsc                     g | ]}d |z  S )zx%d ).0js     [/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/_knockoff.py
<listcomp>z*RegressionFDR.__init__.<locals>.<listcomp>O   s    CCC519CCC       design_methodequisdpaxisT)return_inversereturn_countsright)sider   )indexStatzFDR+FDR)hasattrr   xnamesrangeshapenpasarray_design_knockoff_equi_design_knockoff_sdpmeanendogconcatenateexogexog1exog2statsuniquecumsumlensearchsortedfdrfdrp_ufdr_unqpd	DataFramefdr_df)selfr&   r(   
regeffectsmethodkwargsnobsnvarr)   r*   _unqinvcntccdenomiinumerr1   r0   dfs                        r   __init__zRegressionFDR.__init__I   s.    4## 	D,DKKCCeDJqM.B.BCCCDKz$
5!!&((&,F?#Z
d/"f,,3D99OE5!!O$--2488OE5!&
NE5>:::	

%%d++
	$*T046 6 6S# Ys^^DJ"$s*eai _S3$W55592b1f E	U" ems8I	
	\,,,Z6
Y6
H5	r   c                     t          j        | j                  |k    r| j        | j        |k             d         S t           j        S )zI
        Returns the threshold statistic for a given target FDR.
        r   )r!   minr2   r3   inf)r7   tfdrs     r   	thresholdzRegressionFDR.threshold   s;    
 6$*%%9TZ4/0336Mr   c                     t          j                    }|                    d           |                    | j                   |S )NzRegression FDR results)r   Summary	add_titleadd_dfr6   )r7   summs     r   summaryzRegressionFDR.summary   s<    !!/000DK   r   N)r   )__name__
__module____qualname____doc__rF   rK   rQ   r
   r   r   r   r      sR        ' 'R9 9 9 9v      r   r   c                    	 ddl m}m} n# t          $ r t	          d          w xY w| j        \  }}t          j        | dz  d          }t          j        |          }| |z  } t          j	        | j
        |           } |t          j        |                     }t          j        t          j        |          t          j        |          f          } ||          }t          j        t          j        |           t          j        |          fd          }	 ||	          }	d|z  }
 ||
          }
t          j        |          \  }}t          j        ||z  |f          }d|||z  |z   |f<    ||          }d|j        d<   |                    ||	||g|
g          }t          j        |d	                                                   }t          j	        | j
        |           }t)          | ||          }| ||fS )
zw
    Use semidefinite programming to construct a knockoff design
    matrix.

    Requires cvxopt to be installed.
    r   )solversmatrixz3SDP knockoff designs require installation of cvxopt   r   r   Fshow_progressx)cvxoptrW   rX   ImportError
ValueErrorr    r!   sumsqrtdotTonesr'   zeroseyediag_indicesoptionsr   r"   ravel
_get_knmat)r(   rW   rX   r;   r<   xnmSigmach0G0h1ir   G1solslxcovexogns                     r   r$   r$      s   P********* P P PNOOOP JD$ &q!

C
'#,,C#:DF464  E~A	$7	8	8B	B	"&,,t5A	>	>	>B	B	
UB	B?4  DAq	49d#	$	$BBqvz1}	B',GOO$
++aR"t
,
,C	CH			#	#	%	%B6$&$DtT2&&E?s    %c                    | j         \  }}|d|z  k     rd}t          |          t          j        | dz  d          }t          j        |          }| |z  } t          j        | j        |           }t          j                            |          \  }}t          j	        |          }t          d|z  d          }	|	t          j
        |          z  }	t          | ||	          }
| |
|	fS )a  
    Construct an equivariant design matrix for knockoff analysis.

    Follows the 'equi-correlated knockoff approach of equation 2.4 in
    Barber and Candes.

    Constructs a pair of design matrices exogs, exogn such that exogs
    is a scaled/centered version of the input matrix exog, exogn is
    another matrix of the same shape with cov(exogn) = cov(exogs), and
    the covariances between corresponding columns of exogn and exogs
    are as small as possible.
    rY   z6The equivariant knockoff can ony be used when n >= 2*pr   r   )r    r^   r!   r_   r`   ra   rb   linalgeigrH   rc   ri   )r(   r;   r<   msgrj   rt   evr=   evminrs   ru   s              r   r#   r#      s     JD$af}}Foo &q!

C
'#,,C#:D6$&$DIMM$EBF2JJE	QuWaB	bgdmm	BtT2&&E?r   c                 6   | j         \  }}t          j                            |          }|t          j        ||           z  }t          j        |          \  }}|||fxx         d|z  z  cc<   t          j                            ||f          }t          j                            |           \  }	}
|t          j	        |	t          j	        |	j
        |                    z  }t          j                            |          \  }}
t          j                            |d          \  }}}
|t          j        |          z  }|j
        }|d d d f         t          j                            || j
                  z  j
        }| |z
  t          j	        ||          z   }|S )NrY   )sizer   )r    r!   rw   r?   outerrf   randomnormalqrra   rb   svdr`   solve)r(   rt   rs   r;   r<   ashrp   r   umatur=   ashrxcexru   s                  r   ri   ri      sX    JD$
)--

CBHRC?4  DAq1IIIRIII9$..D9<<DAqBF1bfQS$''(((Dill4  GD!)--Q''KD"aBGBKKD6D
QQQW+	df55
5	8B2ItT***ELr   )rU   numpyr!   pandasr4   statsmodels.iolibr   r   r$   r#   ri   r
   r   r   <module>r      s    2         & & & & & &u u u u u u u up+ + +\" " "J    r   