
    hMh              	          d Z ddlmZ ddlZddlmZ dej        dej        fdZ	 ddej        d	e	de
fd
Z	 ddej        dej        dede
fdZdeeej        f         de
fdZdej        defdZdej        defdZdde
dede
fdZdS )a  Project: PhiK - correlation coefficient package

Created: 2018/09/05

Description:
    Statistics helper functions, for the calculation of phik and significance
    of a contingency table.

Authors:
    KPMG Advanced Analytics & Big Data team, Amstelveen, The Netherlands

Redistribution and use in source and binary forms, with or without
modification, are permitted according to the terms listed in the file
LICENSE.
    )UnionN)statsvalsreturnc                 @    t           j                            |           S )a0  
    Calculation of dependent expected frequencies.

    Calculation is based on the marginal sums of the table, i.e. dependent frequency estimates.
    :param vals: The contingency table. The table contains the observed number of occurrences in each category

    :returns exp: expected frequencies
    )r   contingencyexpected_freq)r   s    O/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/phik/statistics.py!get_dependent_frequency_estimatesr      s     **4000    log-likelihoodlambda_c                     | dd         }|t          j        |dk    d                    }|j        t          j        |j        dk    d                    j        }t          j        ||          \  }}}}|S )ag  
    Chi-square test of independence of variables in a contingency table.

    The expected frequencies are based on the
    marginal sums of the table, i.e. dependent frequency estimates.

    :param vals: The contingency table. The table contains the observed number of occurrences in each category
    :returns test_statistic: the test statistic value
    Nr      )axis)r   )npallTr   chi2_contingency)r   r   valuestest_statistic_s        r
   ,get_chi2_using_dependent_frequency_estimatesr   $   s     !!!WF RVFaKa00001FXrvfh!m!444457F $4VWMMMNAq!r   Tobservedexpected	normalizec                    t          j        |           } t          j        | dk               rt          d          | j        dk    rt          d          |t          |           }t          j        |          }|r,|t          j        |           t          j        |          z  z  }t          j        |                     t           j	                  |z
  dz  |t          j
        |          |dk              }t          j        |          S )a  Calculate pearson chi square between observed and expected 2d contingency matrix

    :param observed: The observed contingency table. The table contains the observed number of occurrences in each cell.
    :param expected: The expected contingency table. The table contains the expected number of occurrences in each cell.
    :param bool normalize: normalize expected frequencies, default is True.
    :return: the pearson chi2 value
    r   z.All values in `observed` must be non-negative.zNo data; `observed` has size 0.N   )outwhere)r   asarrayany
ValueErrorsizer   sumdivideastypefloat64
zeros_like)r   r   r   termss       r
   get_pearson_chi_squarer+   >   s     z(##H	vhl KIJJJ}:;;;4X>>z(##H  Drvh//"&2B2BBCI		$	$x	/A5M(##!m	  E 6%==r   
chi2valuesc                 *    t          j        |           S )a!  
    Estimation of the effective number of degrees of freedom.

    A good approximation of endof is the average value. Alternatively
    a fit to the chi2 distribution can be make. Both values are returned.

    :param list chi2values: list of chi2 values
    :returns: endof0, endof
    )r   mean)r,   s    r
   estimate_ndofr/   _   s     7:r   c                     t           j                            |           }|j        t	          j        |j                  z
  |j        z   dz
  |dk                                    z
  }|dk     rd}|S )a  
    Simple estimation of the effective number of degrees of freedom.

    This equals the nominal calculation for ndof minus the number of empty bins in the
    expected contingency table.

    :param observed: numpy array of observed cell counts
    :returns: endof
    r   r   )r   r   r	   r$   r   r%   shapendim)r   r   endofs      r
   estimate_simple_ndofr4   m   s{      ..x88H
&
 
 	!
-	 	 q=



		  
 qyyLr   c                 Z    | j         t          j        | j                  z
  | j        z   dz
  S )a  
    Simple estimation of the effective number of degrees of freedom.

    This equals the nominal calculation for ndof minus the number of empty bins in the
    expected contingency table.

    :param observed: numpy array of observed cell counts
    :returns: theoretical ndof
    r   )r$   r   r%   r1   r2   )r   s    r
   theoretical_ndofr6      s)     =26(.111HMAAEEr   Flogp	flip_signc                    | t           j         k    r|st           j        nt           j         S t          j        |           }|dk    rTdt          j        dt           j        z            z  d| z  z
  }t          j        |t          j        |          z
            }n t          j                            |           }|r|dz  }|S )a)  
    Convert logarithm of p-value into one-sided Z-value

    :param float logp: logarithm of p-value, should not be greater than 0
    :param bool flip_sign: flip sign of Z-value, e.g. use for input log(1-p). Default is false.
    :returns: statistical significance Z-value
    :rtype: float
    r   g       r   g       @g      )	r   infexplogpisqrtr   normppf)r7   r8   p_valueuz_values        r
   z_from_logprD      s     w&3rvvRVG3fTllG !|| 26!be)$$$sTz1'!bfQii-((:>>'*** 4Nr   )r   )NT)F)__doc__typingr   numpyr   scipyr   ndarrayr   strfloatr   boolr+   listr/   intr4   r6   rD    r   r
   <module>rP      s                   1BJ 12: 1 1 1 1 &6 
*"
   6 JN j$&JBF
   BeD"*$45 %    2: #    4Frz Fc F F F F e        r   