
    _MhoS                         d dl Z d dlZd dlmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ ddlmZ ddlmZ d	gZ G d
 d          ZddZd Z e	dg dg           ZeddddfdZd Zd Zd Zd ZdS )    N)check_random_state
MapWrapperrng_integers_contains_nan)_make_tuple_bunchcdist)_measurements   )_local_correlations)distributionsmultiscale_graphcorrc                       e Zd ZdZd Zd ZdS )
_ParallelPz.Helper function to calculate parallel p-value.c                 0    || _         || _        || _        d S Nxyrandom_states)selfr   r   r   s       P/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/scipy/stats/_mgc.py__init__z_ParallelP.__init__   s    *    c                     | j         |                             | j        j        d                   }| j        |         d d |f         }t	          | j        |          d         }|S )Nr   )r   permutationr   shape	_mgc_statr   )r   indexorderpermy	perm_stats        r   __call__z_ParallelP.__call__   s[    "5)55dfl1oFFuaaah' dfe,,Q/	r   N)__name__
__module____qualname____doc__r   r#    r   r   r   r      s8        88+ + +
    r   r     c                    t                    fdt          |          D             }t          | ||          }t          |          5 }t	          j        t           ||t          |                                        }	ddd           n# 1 swxY w Y   d|	|k                                    z   d|z   z  }
|
|	fS )a  Helper function that calculates the p-value. See below for uses.

    Parameters
    ----------
    x, y : ndarray
        `x` and `y` have shapes ``(n, p)`` and ``(n, q)``.
    stat : float
        The sample test statistic.
    reps : int, optional
        The number of replications used to estimate the null when using the
        permutation test. The default is 1000 replications.
    workers : int or map-like callable, optional
        If `workers` is an int the population is subdivided into `workers`
        sections and evaluated in parallel (uses
        `multiprocessing.Pool <multiprocessing>`). Supply `-1` to use all cores
        available to the Process. Alternatively supply a map-like callable,
        such as `multiprocessing.Pool.map` for evaluating the population in
        parallel. This evaluation is carried out as `workers(func, iterable)`.
        Requires that `func` be pickleable.
    random_state : {None, int, `numpy.random.Generator`,
                    `numpy.random.RandomState`}, optional

        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
        singleton is used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``Generator`` or ``RandomState`` instance then
        that instance is used.

    Returns
    -------
    pvalue : float
        The sample test p-value.
    null_dist : list
        The approximated null distribution.

    c           
          g | ]<}t           j                            t          d dt           j                            =S )l           )sizedtype)nprandomRandomStater   uint32).0_random_states     r   
<listcomp>z_perm_test.<locals>.<listcomp>L   s]     E E E34 Y**<g29,. ,. ,. / / E E Er   r   Nr   )r   ranger   r   r0   arraylistsum)r   r   statrepsworkersr6   r   	parallelp
mapwrapper	null_distpvalues        `     r   
_perm_testrC   #   s(   P &l33LE E E E8=dE E EM Q!=AAAI	G		 G
HT**Yd"D"DEEFF	G G G G G G G G G G G G G G G 9$))+++D9F9s   9BBBc                 "    t          | |           S r   r   )r   s    r   _euclidean_distrE   Z   s    A;;r   	MGCResult)	statisticrB   mgc_dictFc                    t          | t          j                  rt          |t          j                  st          d          | j        dk    r| ddt          j        f         } n"| j        dk    rt          d| j                   |j        dk    r|ddt          j        f         }n"|j        dk    rt          d|j                   | j        \  }}|j        \  }	}
t          | d           t          |d           t          j        t          j	        |                     d	k    s*t          j        t          j	        |                    d	k    rt          d
          ||	k    r||
k    rd}nt          d          |dk     s|	dk     rt          d          | 
                    t          j                  } |
                    t          j                  }t          |          s|t          d          t          |t                    r|d	k     rt          d          |dk     rd}t          j        |t           d           |r$|t          d          t#          | |          \  } }| ||           }  ||          }t%          | |          \  }}|d         }|d         }t'          | |||||          \  }}|||d}t)          |||          }||_        |S )a#  Computes the Multiscale Graph Correlation (MGC) test statistic.

    Specifically, for each point, MGC finds the :math:`k`-nearest neighbors for
    one property (e.g. cloud density), and the :math:`l`-nearest neighbors for
    the other property (e.g. grass wetness) [1]_. This pair :math:`(k, l)` is
    called the "scale". A priori, however, it is not know which scales will be
    most informative. So, MGC computes all distance pairs, and then efficiently
    computes the distance correlations for all scales. The local correlations
    illustrate which scales are relatively informative about the relationship.
    The key, therefore, to successfully discover and decipher relationships
    between disparate data modalities is to adaptively determine which scales
    are the most informative, and the geometric implication for the most
    informative scales. Doing so not only provides an estimate of whether the
    modalities are related, but also provides insight into how the
    determination was made. This is especially important in high-dimensional
    data, where simple visualizations do not reveal relationships to the
    unaided human eye. Characterizations of this implementation in particular
    have been derived from and benchmarked within in [2]_.

    Parameters
    ----------
    x, y : ndarray
        If ``x`` and ``y`` have shapes ``(n, p)`` and ``(n, q)`` where `n` is
        the number of samples and `p` and `q` are the number of dimensions,
        then the MGC independence test will be run.  Alternatively, ``x`` and
        ``y`` can have shapes ``(n, n)`` if they are distance or similarity
        matrices, and ``compute_distance`` must be sent to ``None``. If ``x``
        and ``y`` have shapes ``(n, p)`` and ``(m, p)``, an unpaired
        two-sample MGC test will be run.
    compute_distance : callable, optional
        A function that computes the distance or similarity among the samples
        within each data matrix. Set to ``None`` if ``x`` and ``y`` are
        already distance matrices. The default uses the euclidean norm metric.
        If you are calling a custom function, either create the distance
        matrix before-hand or create a function of the form
        ``compute_distance(x)`` where `x` is the data matrix for which
        pairwise distances are calculated.
    reps : int, optional
        The number of replications used to estimate the null when using the
        permutation test. The default is ``1000``.
    workers : int or map-like callable, optional
        If ``workers`` is an int the population is subdivided into ``workers``
        sections and evaluated in parallel (uses ``multiprocessing.Pool
        <multiprocessing>``). Supply ``-1`` to use all cores available to the
        Process. Alternatively supply a map-like callable, such as
        ``multiprocessing.Pool.map`` for evaluating the p-value in parallel.
        This evaluation is carried out as ``workers(func, iterable)``.
        Requires that `func` be pickleable. The default is ``1``.
    is_twosamp : bool, optional
        If `True`, a two sample test will be run. If ``x`` and ``y`` have
        shapes ``(n, p)`` and ``(m, p)``, this optional will be overridden and
        set to ``True``. Set to ``True`` if ``x`` and ``y`` both have shapes
        ``(n, p)`` and a two sample test is desired. The default is ``False``.
        Note that this will not run if inputs are distance matrices.
    random_state : {None, int, `numpy.random.Generator`,
                    `numpy.random.RandomState`}, optional

        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
        singleton is used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``Generator`` or ``RandomState`` instance then
        that instance is used.

    Returns
    -------
    res : MGCResult
        An object containing attributes:

        statistic : float
            The sample MGC test statistic within ``[-1, 1]``.
        pvalue : float
            The p-value obtained via permutation.
        mgc_dict : dict
            Contains additional useful results:

                - mgc_map : ndarray
                    A 2D representation of the latent geometry of the
                    relationship.
                - opt_scale : (int, int)
                    The estimated optimal scale as a ``(x, y)`` pair.
                - null_dist : list
                    The null distribution derived from the permuted matrices.

    See Also
    --------
    pearsonr : Pearson correlation coefficient and p-value for testing
               non-correlation.
    kendalltau : Calculates Kendall's tau.
    spearmanr : Calculates a Spearman rank-order correlation coefficient.

    Notes
    -----
    A description of the process of MGC and applications on neuroscience data
    can be found in [1]_. It is performed using the following steps:

    #. Two distance matrices :math:`D^X` and :math:`D^Y` are computed and
       modified to be mean zero columnwise. This results in two
       :math:`n \times n` distance matrices :math:`A` and :math:`B` (the
       centering and unbiased modification) [3]_.

    #. For all values :math:`k` and :math:`l` from :math:`1, ..., n`,

       * The :math:`k`-nearest neighbor and :math:`l`-nearest neighbor graphs
         are calculated for each property. Here, :math:`G_k (i, j)` indicates
         the :math:`k`-smallest values of the :math:`i`-th row of :math:`A`
         and :math:`H_l (i, j)` indicates the :math:`l` smallested values of
         the :math:`i`-th row of :math:`B`

       * Let :math:`\circ` denotes the entry-wise matrix product, then local
         correlations are summed and normalized using the following statistic:

    .. math::

        c^{kl} = \frac{\sum_{ij} A G_k B H_l}
                      {\sqrt{\sum_{ij} A^2 G_k \times \sum_{ij} B^2 H_l}}

    #. The MGC test statistic is the smoothed optimal local correlation of
       :math:`\{ c^{kl} \}`. Denote the smoothing operation as :math:`R(\cdot)`
       (which essentially set all isolated large correlations) as 0 and
       connected large correlations the same as before, see [3]_.) MGC is,

    .. math::

        MGC_n (x, y) = \max_{(k, l)} R \left(c^{kl} \left( x_n, y_n \right)
                                                    \right)

    The test statistic returns a value between :math:`(-1, 1)` since it is
    normalized.

    The p-value returned is calculated using a permutation test. This process
    is completed by first randomly permuting :math:`y` to estimate the null
    distribution and then calculating the probability of observing a test
    statistic, under the null, at least as extreme as the observed test
    statistic.

    MGC requires at least 5 samples to run with reliable results. It can also
    handle high-dimensional data sets.
    In addition, by manipulating the input data matrices, the two-sample
    testing problem can be reduced to the independence testing problem [4]_.
    Given sample data :math:`U` and :math:`V` of sizes :math:`p \times n`
    :math:`p \times m`, data matrix :math:`X` and :math:`Y` can be created as
    follows:

    .. math::

        X = [U | V] \in \mathcal{R}^{p \times (n + m)}
        Y = [0_{1 \times n} | 1_{1 \times m}] \in \mathcal{R}^{(n + m)}

    Then, the MGC statistic can be calculated as normal. This methodology can
    be extended to similar tests such as distance correlation [4]_.

    .. versionadded:: 1.4.0

    References
    ----------
    .. [1] Vogelstein, J. T., Bridgeford, E. W., Wang, Q., Priebe, C. E.,
           Maggioni, M., & Shen, C. (2019). Discovering and deciphering
           relationships across disparate data modalities. ELife.
    .. [2] Panda, S., Palaniappan, S., Xiong, J., Swaminathan, A.,
           Ramachandran, S., Bridgeford, E. W., ... Vogelstein, J. T. (2019).
           mgcpy: A Comprehensive High Dimensional Independence Testing Python
           Package. :arXiv:`1907.02088`
    .. [3] Shen, C., Priebe, C.E., & Vogelstein, J. T. (2019). From distance
           correlation to multiscale graph correlation. Journal of the American
           Statistical Association.
    .. [4] Shen, C. & Vogelstein, J. T. (2018). The Exact Equivalence of
           Distance and Kernel Methods for Hypothesis Testing.
           :arXiv:`1806.05514`

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.stats import multiscale_graphcorr
    >>> x = np.arange(100)
    >>> y = x
    >>> res = multiscale_graphcorr(x, y)
    >>> res.statistic, res.pvalue
    (1.0, 0.001)

    To run an unpaired two-sample test,

    >>> x = np.arange(100)
    >>> y = np.arange(79)
    >>> res = multiscale_graphcorr(x, y)
    >>> res.statistic, res.pvalue  # doctest: +SKIP
    (0.033258146255703246, 0.023)

    or, if shape of the inputs are the same,

    >>> x = np.arange(100)
    >>> y = x
    >>> res = multiscale_graphcorr(x, y, is_twosamp=True)
    >>> res.statistic, res.pvalue  # doctest: +SKIP
    (-0.008021809890200488, 1.0)

    zx and y must be ndarraysr   N   z&Expected a 2-D array `x`, found shape z&Expected a 2-D array `y`, found shape raise)
nan_policyr   zInputs contain infinitiesTzZShape mismatch, x and y must have shape [n, p] and [n, q] or have shape [n, p] and [m, p].   z;MGC requires at least 5 samples to give reasonable results.z$Compute_distance must be a function.z1Number of reps must be an integer greater than 0.r)   zThe number of replications is low (under 1000), and p-value calculations may be unreliable. Use the p-value result, with caution!)
stacklevelz*Cannot run if inputs are distance matricesstat_mgc_map	opt_scale)r=   r>   r6   )mgc_maprP   rA   )
isinstancer0   ndarray
ValueErrorndimnewaxisr   r   r;   isinfastypefloat64callableintwarningswarnRuntimeWarning_two_sample_transformr   rC   rF   r<   )r   r   compute_distancer=   r>   
is_twosampr6   nxpxnypymsgr<   	stat_dictrO   rP   rB   rA   rH   ress                       r   r   r   b   s-   N a$$ 5Jq"*,E,E 53444 	v{{aaam	
1K!'KKLLLv{{aaam	
1K!'KKLLLWFBWFB !((((!(((( 
vbhqkkQ"&!"5"5"9"94555	Rxx88JJ K L L L 
Avva $ % % 	% 	
A	A $%% A*:*F?@@@ dC   9D1HHLMMM	 	c>a8888 +#IJJJ$Q**1#QQ  1ooOD)^,L+&I #1aD'0<> > >FI (&&( (H
 D&(
+
+CCHJr   c                    t          | |d          }|j        \  }}|dk    s|dk    r||dz
           |dz
           }||z  }n5t          |           dz
  }t          ||          }t	          ||          \  }}||d}	||	fS )a  Helper function that calculates the MGC stat. See above for use.

    Parameters
    ----------
    distx, disty : ndarray
        `distx` and `disty` have shapes ``(n, p)`` and ``(n, q)`` or
        ``(n, n)`` and ``(n, n)``
        if distance matrices.

    Returns
    -------
    stat : float
        The sample MGC test statistic within ``[-1, 1]``.
    stat_dict : dict
        Contains additional useful additional returns containing the following
        keys:

            - stat_mgc_map : ndarray
                MGC-map of the statistics.
            - opt_scale : (float, float)
                The estimated optimal scale as a ``(x, y)`` pair.

    mgc)global_corrr   )rO   rP   )r   r   len_threshold_mgc_map_smooth_mgc_map)
distxdistyrO   nmr<   rP   	samp_sizesig_connectrg   s
             r   r   r   }  s    2 'ueGGGLDAqAvva AE"1q5)E		JJN	 )yAA *+|DDi!-') )I ?r   c                    | j         \  }}dd|z  z
  }||dz
  z  dz  dz
  }t          j                            |||          dz  dz
  }t	          || |dz
           |dz
                     }| |k    }t          j        |          dk    rVt          j        |          \  }}t          j	        |d	          \  }}t          j
        |dd
                   dz   }	||	k    }nt          j        dgg          }|S )at  
    Finds a connected region of significance in the MGC-map by thresholding.

    Parameters
    ----------
    stat_mgc_map : ndarray
        All local correlations within ``[-1,1]``.
    samp_size : int
        The sample size of original data.

    Returns
    -------
    sig_connect : ndarray
        A binary matrix with 1's indicating the significant region.

    r   {Gz?   r-   g      ?rJ   r   T)return_countsNF)r   r   betappfmaxr0   r;   r
   labeluniqueargmaxr9   )
rO   rs   rr   rq   per_sig	thresholdrt   r5   label_counts	max_labels
             r   rm   rm     s   " DAq
 4)#$GY]+A-3I"&&w	9EEIAMI I|AE21q59::I *K	vkQ&,[99Q)KtDDD< Il122.//!3	!Y.hy))r   c                 &   |j         \  }}||dz
           |dz
           }||g}t          j                            |           dk    rt          j        |           t          j        dt          ||          z            t          ||          z  k    r}t          ||                    }t          j        ||k    | z            }||k    rG|}|\  }}	||z  |	z   }
t          j        |
          |z  }t          j        |
          |z  }	|dz   |	dz   g}||fS )aR  Finds the smoothed maximal within the significant region R.

    If area of R is too small it returns the last local correlation. Otherwise,
    returns the maximum within significant_connected_region.

    Parameters
    ----------
    sig_connect : ndarray
        A binary matrix with 1's indicating the significant region.
    stat_mgc_map : ndarray
        All local correlations within ``[-1, 1]``.

    Returns
    -------
    stat : float
        The sample MGC statistic within ``[-1, 1]``.
    opt_scale: (float, float)
        The estimated optimal scale as an ``(x, y)`` pair.

    r   r   rv   )	r   r0   linalgnormr;   ceilr{   minwhere)rt   rO   rr   rq   r<   rP   max_corrmax_corr_indexklone_d_indicess              r   rn   rn     s   * DAq Aq1u%DAI	y~~k""a'' 6+"'$Q*:";";c!Qii"GGG<455H  X|x'?;&NOON4%1 !A	F=))Q.F=))A-qS!A#J	?r   c                    | j         d         }|j         d         }t          j        | |gd          }t          j        t          j        |          t          j        |          gd                              dd          }||fS )a  Helper function that concatenates x and y for two sample MGC stat.

    See above for use.

    Parameters
    ----------
    u, v : ndarray
        `u` and `v` have shapes ``(n, p)`` and ``(m, p)``.

    Returns
    -------
    x : ndarray
        Concatenate `u` and `v` along the ``axis = 0``. `x` thus has shape
        ``(2n, p)``.
    y : ndarray
        Label matrix for `x` where 0 refers to samples that comes from `u` and
        1 refers to samples that come from `v`. `y` thus has shape ``(2n, 1)``.

    r   )axisr*   r   )r   r0   concatenatezerosonesreshape)uvrb   rd   r   r   s         r   r_   r_     st    ( 
B	
B
1vA&&&A
bgbkk2;;;CCBJJAa4Kr   )r)   r*   N)r\   numpyr0   scipy._lib._utilr   r   r   r   scipy._lib._bunchr   scipy.spatial.distancer	   scipy.ndimager
   _statsr    r   __all__r   rC   rE   rF   r   r   rm   rn   r_   r(   r   r   <module>r      s        X X X X X X X X X X X X / / / / / / ( ( ( ( ( ( ' ' ' ' ' ' ' ' ' ' ' '      !
"
       $4 4 4 4n   kAAA2G G	 1@d!"u4X X X Xv. . .b* * *Z0 0 0f    r   