
    hMhOr                        d Z ddlmZmZmZ ddlZddlZddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ dedededefdZdedededeeef         fdZ dedededefdZ!dedededefdZ"dedededeeef         fdZ#dedededefdZ$	 d?dej%        de&deej%        ej%        f         fdZ'd?dede&defdZ(dedefdZ)d@dededefdZ*d ej%        d!ej%        d"ej%        deej%        ej%        f         fd#Z+	 d?d$ej%        de&deej%        ej%        f         fd%Z,	 	 	 	 	 dAd'e	j-        d(e.de&d)ed*e/d+e/d,e/de	j-        fd-Z0	 	 	 	 	 	 	 	 	 	 dBd0e	j-        d1ee1         de&d)ed2e/d*e/d+e/d,e/d3e/d4e/fd5Z2	 	 	 	 	 	 	 dCd'e	j-        d7ee1e3f         fd8Z4	 	 	 	 	 	 	 	 	 	 	 dDd0e	j-        d1ee1         de&d)ed2e/d7ee1e3f         d*e/d+e/d,e/d3e/d4e/fd9Z5	 	 	 	 	 	 	 	 	 dEdeej%        e1e	j6        f         d:eej%        e1e	j6        f         d;e1d<eee1ej%        e.f         d2e/d)ede&d*e/d+e/d,e/d4e/de	j-        fd=Z7	 	 	 	 dFdeej%        e1e	j6        f         d:eej%        e1e	j6        f         de&d*e/d+e/d,e/de	j-        fd>Z8dS )Ga  Project: PhiK - correlation analyzer library

Created: 2018/09/05

Description:
    Functions for calculating the statistical significance of outliers in a contingency table.

Authors:
    KPMG Advanced Analytics & Big Data team, Amstelveen, The Netherlands

Redistribution and use in source and binary forms, with or without
modification, are permitted according to the terms listed in the file
LICENSE.
    )TupleUnionOptionalN)stats)betainc)definitions   )bin_datahist2d_from_rebinned_df)log_incompbeta)z_from_logp)dq_check_nunique_values)array_like_to_dataframeguess_interval_colsnobsnexpnexperrreturnc                     | dk    rdS |dk    r4|dk    r|n|}|||z  z  }||z  dz   }dd|z   z  }t          | ||          }n#t          j                            | dz
  |          }|S )a  
    Calculate p-value for nobs observations given the expected value and its
    uncertainty using the Linnemann method.

    If the uncertainty
    on the expected value is known the Linnemann method is used. Otherwise the Poisson distribution is
    used to estimate the p-value.

    Measures of Significance in HEP and Astrophysics
    Authors: J. T. Linnemann
    http://arxiv.org/abs/physics/0312059

    Code inspired by:
    https://root.cern.ch/doc/master/NumberCountingUtils_8cxx_source.html#l00086

    Three fixes are added for:

      * nobs = 0, when - by construction - p should be 1.
      * uncertainty of zero, for which Linnemann's function does not work, but one can simply revert to regular Poisson.
      * when nexp=0, betainc always returns 1. Here we set nexp = nexperr.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: p-value
    :rtype: float
    r   r	   )r   r   poissonsf)r   r   r   nexpalttaubxps           M/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/phik/outliers.pypoisson_obs_pr   !   s    8 qyyq{{(($$7*+cMASMD!QMTAXt,,H    c                 b   | dk    rdt           j         fS |dk    r4|dk    r|n|}|||z  z  }||z  dz   }dd|z   z  }t          | ||          }n_t          j                            | dz
  |          }t          j                            | dz
  |          }	|t          j        d|	z
            f}|S )a  
    Calculate logarithm of p-value for nobs observations given the expected value and its
    uncertainty using the Linnemann method.

    If the uncertainty
    on the expected value is known the Linnemann method is used. Otherwise the Poisson distribution is
    used to estimate the p-value.

    Measures of Significance in HEP and Astrophysics
    Authors: J. T. Linnemann
    http://arxiv.org/abs/physics/0312059

    Code inspired by:
    https://root.cern.ch/doc/master/NumberCountingUtils_8cxx_source.html#l00086

    Three fixes are added for:

      * nobs = 0, when - by construction - p should be 1.
      * uncertainty of zero, for which Linnemann's function does not work, but one can simply revert to regular Poisson.
      * when nexp=0, betainc always returns 1. Here we set nexp = nexperr.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: tuple containing pvalue and 1 - pvalue
    :rtype: tuple
    r   r	   )npinfr   r   r   logsfr   log)
r   r   r   r   r   r   r   tlogplogpr   s
             r   log_poisson_obs_pr'   L   s    8 qyy26'z{{(($$7*+cMASMtQ**}""4!8T22MTAXt,,rva!e}}%Lr   c                    t          | ||          }|dk    s|dk    rIt          | ||          }|dk    r|d         }t          |          }n:|d         }t          |d          }n t          j                            |           }|S )a2  
    Calculate the Z-value for measuring nobs observations given the expected value.

    The Z-value express the number
    of sigmas the observed value deviates from the expected value, and is based on the p-value calculation.
    If the uncertainty on the expected value is known the Linnemann method is used. Otherwise the Poisson distribution is used to estimate the p-value.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: Z-value
    :rtype: float
    r   r	   T	flip_sign)r   r'   r   r   normppfr   r   r   p_valuer%   r&   z_valuelog1mps           r   poisson_obs_zr1   z   s     D$00G !||w!||!$g66a<<8D!$''GG1XF!&D999GG :>>'***Nr   c                 j    t          | ||          }t          | dz   ||          }d||z
  z  }||z  }|S )a.  
    Calculate the p-value for measuring nobs observations given the expected value.

    The Lancaster mid-P correction is applied to take into account the effects of discrete statistics.
    If the uncertainty on the expected value is known the Linnemann method is used for the p-value calculation.
    Otherwise the Poisson distribution is used to estimate the p-value.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: mid p-value
    :rtype: float
    r	         ?)r   )r   r   r   r   pplus1mid_ps         r   poisson_obs_mid_pr6      sF     	dD'**A4!8T733F1v:EJAHr   c                    t          | ||          }t          | dz   ||          }|d         }|d         }t          j        d          |z   t          j        dt          j        ||z
            z             z   }|d         }|d         }	t          j        d          |	z   t          j        dt          j        ||	z
            z             z   }
||
fS )aP  
    Calculate the logarithm of the p-value for measuring nobs observations given the expected value.

    The Lancaster mid-P correction is
    applied to take into account the effects of discrete statistics. If the uncertainty on the expected value is known the
    Linnemann method is used for the p-value calculation. Otherwise the Poisson distribution is used to estimate the p-value.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: tuple of log(p) and log(1-p)
    :rtype: tuple
    r	   r   r3   )r'   r!   r$   exp)r   r   r   r%   tlogpp1lplp1logmidplqlq1logmidqs              r   log_poisson_obs_mid_pr@      s      dD'22Eq$88G 
qB
!*CfSkkBBF38,<,<(<!=!==G
 
qB
!*CfSkkC"&RVBH-=-=)=">">>GGr   c                    t          | ||          }|dk    s|dk    rIt          | ||          }|dk    r|d         }t          |          }n:|d         }t          |d          }n t          j                            |           }|S )a  Calculate the Z-value for measuring nobs observations given the expected value.

    The Z-value express the number
    of sigmas the observed value deviates from the expected value, and is based on the p-value calculation.
    The Lancaster midP correction is applied to take into account the effects of low statistics. If the uncertainty on the
    expected value is known the Linnemann method is used for the p-value calculation. Otherwise the Poisson distribution is
    used to estimate the p-value.

    :param int nobs: observed count
    :param float nexp: expected number
    :param float nexperr: uncertainty on the expected number
    :returns: Z-value
    :rtype: tuple
    r   r	   Tr)   )r6   r@   r   r   r+   r,   r-   s           r   poisson_obs_mid_zrB      s      dG44G !||w!||%dD'::a<<8D!$''GG1XF!&D999GG :>>'***Nr   r   values	CI_methodc           
      ~   t          j        | j                  }t          j        | j                  }t          | j        d                   D ]l}t          | j        d                   D ]M}| |         |         }| |                                         |z
  }| dd|f                                         |z
  }|                                 |z
  |z
  |z
  }	|	dk    r||z  |	z  ||         |<   t          ||          }
t          ||          }t          |	|          }t          j        t          |
|z  |	z  d          t          ||z  |	z  d          z   t          |||         |         z  |	z  d          z             ||         |<   #t           j        ||         |<   t           j        ||         |<   On||fS )a  
    Calculation of expected frequencies, based on the ABCD-method, i.e. independent frequency estimates.

    :param values: The contingency table. The table contains the observed number of occurrences in each category.
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.    exact_poisson: error calculated from the asymmetric exact poisson interval
    :returns exp, experr: expected frequencies, error on the expected frequencies
    r   r	   NrD      )	r!   zerosshaperangesumget_uncertaintysqrtpownan)rC   rD   r8   experrijAobsBCDsigmaBsigmaCsigmaDs                r   #get_independent_frequency_estimatesrZ      s    (6<
 
 CXfl##F 6<?## & &v|A'' 	& 	&A!9Q<Dq	$&Aqqq!t  ""T)A

q 1$t+A1uuEAIAq	(i@@@(i@@@(i@@@!w
Q**&1*q.!,,-&3q6!9,q0!445   q	! FAq	!vq	!'	&* ;r   r   c                     |dk    rt          |           }n8|dk    rt          |           }n"t          d                    |                    |S )aj  
    Calculate the uncertainty on a random number x taken from the poisson distribution

    The uncertainty on the x is calculated using either the standard poisson error (poisson) or using the asymmetric
    exact poisson interval (exact_poisson).
    https://www.ncbi.nlm.nih.gov/pubmed/2296988 #FIXME: check ref

    :param float x: value, must be equal or greater than zero
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.    exact_poisson: error calculated from the asymmetric exact poisson interval
    :return x_err: the uncertainty on x (1 sigma)
    exact_poissonr   zCI method {} not valid)get_exact_poisson_uncertaintyget_poisson_uncertaintyNotImplementedErrorformat)r   rD   xerrs      r   rL   rL      sZ     O##,Q//	i		&q))!":"A"A)"L"LMMMKr   c                 :    | dk    rt          j        |           ndS )z
    Calculate the uncertainty on x using standard poisson error. In case x=0 the error=1 is assigned.

    :param float x: value
    :return x_err: the uncertainty on x (1 sigma)
    :rtype: float
    r	   g      ?)r!   rM   )r   s    r   r^   r^   8  s     a271:::S(r   nsigmasc                 T   t           j                            d|z  dd          }t           j                            d|z  dd          }| dk    r&t           j                            |d| z            dz  nd}t           j                            |d| dz   z            dz  }||z
  dz  S )ab  
    Calculate the uncertainty on x using an exact poisson confidence interval. The width of the confidence interval can
    be specified using the number of sigmas. The default number of sigmas is set to 1, resulting in an error that is
    approximated by the standard poisson error sqrt(x).

    Exact poisson uncertainty is described here:
    https://ms.mcmaster.ca/peter/s743/poissonalpha.html
    https://www.statsdirect.com/help/rates/poisson_rate_ci.htm
    https://www.ncbi.nlm.nih.gov/pubmed/2296988

    :param float x: value
    :return x_err: the uncertainty on x (1 sigma)
    :rtype: float
    r   r	   )locscalerG   )r   r+   cdfchi2r,   )r   rc   plpulbubs         r   r]   r]   C  s    " 
W!1	5	5B	G!	4	4B*+q&&AE	"	"Q	&	&aB	AQK	(	(1	,B Gq=r   obsr8   rP   c                    t          j        | j                  }t          j        | j                  }t          | j        d                   D ]}t          | j        d                   D ]~}t	          | |         |         ||         |         ||         |                   ||         |<   t          | |         |         ||         |         ||         |                   ||         |<   ||fS )a  
    Evaluation of significance of observation

    Evaluation of the significance of the difference between the observed number of occurrences and the expected number of
    occurrences, taking into account the uncertainty on the expected number of occurrences. When the uncertainty is
    not zero, the Linnemann method is used to calculate the p-values.

    :param obs: observed numbers
    :param exp: expected numbers
    :param experr: uncertainty on the expected numbers
    :returns: pvalues, zvalues
    r   r	   )r!   rH   rI   rJ   r6   rB   )rn   r8   rP   pvalueszvaluesrQ   rR   s          r   get_outlier_significancesrr   ^  s      hsy!!Ghsy!!G39Q<   R Rsy|$$ 	R 	RA-c!fQiQF1IaLQQGAJqM-c!fQiQF1IaLQQGAJqMM	R Gr   datac                 Z    t          | |          \  }}t          | ||          \  }}||fS )a|  
    Calculate the significance matrix of excesses or deficits in a contingency table

    :param data: numpy array contingency table
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.    exact_poisson: error calculated from the asymmetric exact poisson interval
    :return: p-value matrix, outlier significance matrix
    rF   )rZ   rr   )rs   rD   r8   rP   rp   rq   s         r   'outlier_significance_matrix_from_hist2dru   x  s;     6diPPPKC0sFCCGWGr   Tdata_binnedbinning_dict	ndecimalsdropnadrop_underflowdrop_overflowc           
         | j         \  }}t          | |||          }	d|	j        v s	d|	j        v rLt          j        d                    ||	j        d         ||	j        d                              t          j        S |df|dffD ]~\  }
                                v ra| |          	                    t          j        t          j        t          j        g                                                                                             j        }t#          t%          |          t%          t'          |	|
                    z
            }g |D ]9}                    t          j        ||k              d         d                    :fdt-          t/                                       D             }|t#          t'          |	|
          t/          |          d                   z  }t1          |	|
|           t3          |	j        |          \  }}t7          j        ||	j        |	j         	          }|S )
a,  
    Calculate the significance matrix of excesses or deficits

    :param data_binned: input data. DataFrame must contain exactly two columns
    :param dict binning_dict: dictionary with bin edges for each binned interval variable. When no bin_edges are    provided values are used as bin label. Otherwise, bin labels are constructed based on provided bin edge information.
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.     exact_poisson: error calculated from the asymmetric exact poisson interval
    :param ndecimals: number of decimals to use in labels of binned interval variables to specify bin edges (default=1)
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binning    a numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True (relevant when binning    a numeric variable)
    :return: outlier significance matrix (pd.DataFrame)
    r	   r   zdToo few unique values for variable {0:s} ({1:d}) or {2:s} ({3:d}) to calculate outlier significancesindexcolumnsc                     g | ]A}|vd                               |         d                  |         d                   BS )z{1:.{0}f}_{2:.{0}f}r   r	   )r`   ).0rQ   rw   cimissingrx   s     r   
<listcomp>z@outlier_significance_matrix_from_rebinned_df.<locals>.<listcomp>  sa        H}}	 &,,|Aq1!4l1oa6H6K  %}}r   NrF   )r}   r~   )r~   r   rI   warningswarnr`   r!   rO   keysisindefsUFOFNaNvalue_counts
sort_indexr}   listsetgetattrappendwhererJ   lensetattrru   rC   pd	DataFrame)rv   rw   rD   rx   ry   rz   r{   c0c1df_datahista	orig_valsmissingvvalsrp   rq   outlier_overviewr   r   s    ` `              @@r   ,outlier_significance_matrix_from_rebinned_dfr     s]   4  FB)V^] K 	Kk&7!7!7"F2{'8';RARSTAUVV	
 	
 	
 vgY0 * *1!!#### [^00$'47DH1MNNNOPQR	  3y>>CQ0G0G,H,HHIIGH @ @a 8 8 ;A >????       s<?3344	  D Da00T=>>>DKD)))>i  GW |{(+2E   r   
   Fdfinterval_colsquantileretbinsverbosec           	         t          | j                  dk    rt          d          |t          | |
          }t	          | ||          \  }}t          ||d||          \  }}t          |||||||          }|	r||fS |S )a  
    Calculate the significance matrix of excesses or deficits

    :param df: input data. DataFrame must contain exactly two columns
    :param interval_cols: columns with interval variables which need to be binned
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.    exact_poisson: error calculated from the asymmetric exact poisson interval
    :param bins: number of bins, or a list of bin edges (same for all columns), or a dictionary where per column the bins are specified. (default=10)    E.g.: bins = {'mileage':5, 'driver_age':[18,25,35,45,55,65,125]}
    :param ndecimals: number of decimals to use in labels of binned interval variables to specify bin edges (default=1)
    :param bool quantile: when the number of bins is specified, use uniform binning (False) or quantile binning (True)
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binning    a numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True (relevant when binning    a numeric variable)
    :param bool retbins: if true, function also returns dict with bin_edges of rebinned variables.
    :param bool verbose: if False, do not print all interval columns that are guessed
    :return: outlier significance matrix (pd.DataFrame)
    rG   z"df should contain only two columnsNry   Tr   binsr   rD   rx   ry   rz   r{   )r   r~   
ValueErrorr   r   r
   r   )r   r   rD   rx   r   r   ry   rz   r{   r   r   df_cleaninterval_cols_cleanrv   rw   	os_matrixs                   r   outlier_significance_matrixr     s    D 2:!=>>>+B88$;
M&% % %!H! !)%t$! ! !K =%#  I  ',&&r    combinationsc           
      
   |i }|st          j        | j        d          }g }t          |          D ]P\  }	\  }
}t	          | |
|g                                         ||||||          }|                    |
||f           Q|S )ah  
    Calculate the significance matrix of excesses or deficits for all possible combinations of variables, or for
    those combinations specified using combinations. This functions could also be used instead of
    outlier_significance_matrices in case all variables are either categorical or ordinal, so no binning is required.

    :param data_binned: input data. Interval variables need to be binned. DataFrame must contain exactly two columns
    :param dict binning_dict: dictionary with bin edges for each binned interval variable. When no bin_edges are    provided values are used as bin label. Otherwise, bin labels are constructed based on provided bin edge information.
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.    exact_poisson: error calculated from the asymmetric exact poisson interval
    :param ndecimals: number of decimals to use in labels of binned interval variables to specify bin edges (default=1)
    :param combinations: in case you do not want to calculate an outlier significance matrix for all permutations of    the available variables, you can specify a list of the required permutations here, in the format    [(var1, var2), (var2, var4), etc]
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binninga numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True (relevant when binning    a numeric variable)
    :return: dictionary with outlier significance matrices (pd.DataFrame)
    NrG   r   )	itertoolsr   r~   	enumerater   copyr   )rv   rw   rD   rx   r   ry   rz   r{   outliers_overviewrQ   r   r   zvalues_overviews                r   .outlier_significance_matrices_from_rebinned_dfr     s    >   F -k.A1EE .. 
= 
=8BGR!&&(()'
 
 
 	  "b*:!;<<<<r   c           
          |t          | |          }t          | ||          \  }}t          ||d||          \  }}t          ||||||||	          }d |D             }|
r||fS |S )a  
    Calculate the significance matrix of excesses or deficits for all possible combinations of variables, or for
    those combinations specified using combinations

    :param df: input data
    :param interval_cols: columns with interval variables which need to be binned
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.     exact_poisson: error calculated from the asymmetric exact poisson interval
    :param ndecimals: number of decimals to use in labels of binned interval variables to specify bin edges (default=1)
    :param bins: number of bins, or a list of bin edges (same for all columns), or a dictionary where per column the bins are specified. (default=10)    E.g.: bins = {'mileage':5, 'driver_age':[18,25,35,45,55,65,125]}
    :param bool quantile: when the number of bins is specified, use uniform binning (False) or quantile binning (True)
    :param combinations: in case you do not want to calculate an outlier significance matrix for all permutations of    the available variables, you can specify a list of the required permutations here, in the format    [(var1, var2), (var2, var4), etc]
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binning    a numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True (relevant when binning    a numeric variable)
    :param bool retbins: if true, function also returns dict with bin_edges of rebinned variables.
    :param bool verbose: if False, do not print all interval columns that are guessed
    :return: dictionary with outlier significance matrices (pd.DataFrame)
    Nr   Tr   )r   ry   rz   r{   c                 F    i | ]\  }}}d                      ||g          |S ):)join)r   r   r   r   s       r   
<dictcomp>z1outlier_significance_matrices.<locals>.<dictcomp>  s0    FFFYRQ388RH%%qFFFr   )r   r   r
   r   )r   r   rD   rx   r   r   r   ry   rz   r{   r   r   r   r   rv   rw   os_matricess                    r   outlier_significance_matricesr   H  s    N +B88$;
M&% % %!H! !)%t$! ! !K A!%#	 	 	K GF+FFFK )L((r   ynum_varsr   c                 x    t          | |          }|t          ||
          }t          |||||||||	|

  
        S )a   
    Calculate the significance matrix of excesses or deficits of input x and input y. x and y can contain interval,     ordinal or categorical data. Use the num_vars variable to indicate whether x and/or y contain interval data.

    :param list x: array-like input
    :param list y: array-like input
    :param list num_vars: list of variables which are numeric and need to be binned, either ['x'],['y'],or['x','y']
    :param bins: number of bins, or a list of bin edges (same for all columns), or a dictionary where per column the bins are specified. (default=10)    E.g.: bins = {'mileage':5, 'driver_age':[18,25,35,45,55,65,125]}
    :param bool quantile: when the number of bins is specified, use uniform binning (False) or quantile binning (True)
    :param ndecimals: number of decimals to use in labels of binned interval variables to specify bin edges (default=1)
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.     exact_poisson: error calculated from the asymmetric exact poisson interval
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binning a     numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True     (relevant when binning a numeric variable)
    :param bool verbose: if False, do not print all interval columns that are guessed
    :return: outlier significance matrix (pd.DataFrame)
    N)	r   r   r   rx   rD   ry   rz   r{   r   )r   r   r   )r   r   r   r   r   rx   rD   ry   rz   r{   r   r   s               r   outlier_significance_from_arrayr     s^    F 
!A	&	&B&r733&
%#   r   c                 J    t          | |          }t          |||||          S )a$  
    Calculate the significance matrix of excesses or deficits of input x and input y. x and y can contain binned
    interval, ordinal or categorical data.

    :param list x: array-like input
    :param list y: array-like input
    :param string CI_method: method to be used for uncertainty calculation. poisson: normal poisson error.     exact_poisson: error calculated from the asymmetric exact poisson interval
    :param bool dropna: remove NaN values with True
    :param bool drop_underflow: do not take into account records in underflow bin when True (relevant when binning     a numeric variable)
    :param bool drop_overflow: do not take into account records in overflow bin when True (relevant when binning     a numeric variable)
    :return: outlier significance matrix (pd.DataFrame)
    )rD   ry   rz   r{   )r   r   )r   r   rD   ry   rz   r{   r   s          r   &outlier_significance_from_binned_arrayr     s9    2 
!A	&	&B&
%#   r   )r   )r	   )r   r	   TTT)
Nr   r	   r   FTTTFT)Nr   r	   r   TTT)Nr   r	   r   Fr   TTTFT)	Nr   Fr	   r   TTTT)r   TTT)9__doc__typingr   r   r   r   numpyr!   pandasr   r   scipyr   scipy.specialr   phikr   r   binningr
   r   r   
statisticsr   data_qualityr   utilsr   r   intfloatr   r'   r1   r6   r@   rB   ndarraystrrZ   rL   r^   r]   rr   ru   r   dictboolr   r   r   tupler   r   Seriesr   r   r   r   r   <module>r      s4    * ) ) ) ) ) ) ) ) )                    ! ! ! ! ! ! $ $ $ $ $ $ 6 6 6 6 6 6 6 6 # # # # # # # # # # # # 1 1 1 1 1 1 ? ? ? ? ? ? ? ?( (5 (5 (U ( ( ( (V+C +u +u +ue|AT + + + +\ 5 5 U    BC u u     , 
  %* 
5%<       FC u u     F *3' 'J'#&'
2:rz!"' ' ' 'T u  U    0)u ) ) ) ) ) U U 5    6	*.0j
2:rz!"   6 (1 
*!$
2:rz!"   , E EEE E 	E
 E E E \E E E ET %)	< <
<D>< < 	< < < < < < < < < <B ')3 33
 e$3 3 3 3p %)	')B B
BD>B B 	B B e$B B B B B B B B BP /13 3RZry()3RZry()3 3 T2:t+
,	3
 3 3 3 3 3 3 3 \3 3 3 3r ! !RZry()!RZry()! ! 	!
 ! ! \! ! ! ! ! !r   