
    0Ph<.                         d dl mZ d dlmZ d dlmZ d dlZddlm	Z	m
Z
mZmZmZ ddlmZ ddd	d
ZddZ G d de          Zd Z G d de          Zd Zd ZdddZddZ G d de          Zd ZdS )    )Counter)suppress)
NamedTupleN   )_isin_searchsorted
_setdiff1ddeviceget_namespaceis_scalar_nanFreturn_inversereturn_countsc                j    | j         t          k    rt          | ||          S t          | ||          S )a  Helper function to find unique values with support for python objects.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : ndarray
        Values to check for unknowns.

    return_inverse : bool, default=False
        If True, also return the indices of the unique values.

    return_counts : bool, default=False
        If True, also return the number of times each unique item appears in
        values.

    Returns
    -------
    unique : ndarray
        The sorted unique values.

    unique_inverse : ndarray
        The indices to reconstruct the original array from the unique array.
        Only provided if `return_inverse` is True.

    unique_counts : ndarray
        The number of times each of the unique values comes up in the original
        array. Only provided if `return_counts` is True.
    r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      U/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/utils/_encode.py_uniquer      sO    > |v>
 
 
 	
 ~]       c                 X   t          |           \  }}d\  }}|r|r|                    |           \  }}}}nK|r|                    |           \  }}n0|r|                    |           \  }}n|                    |           }|j        rst          |d                   r^t          ||j        |          }|d|dz            }|r	||||k    <   |r-|	                    ||d                   ||<   |d|dz            }|f}	|r|	|fz  }	|r|	|fz  }	t          |	          dk    r|	d         n|	S )zHelper function to find unique values for numpy arrays that correctly
    accounts for nans. See `_unique` documentation for details.)NNxpNr   r   )r   
unique_allunique_inverseunique_countsunique_valuessizer   r   nansumlen)
r   r   r   r   _inversecountsuniquesnan_idxrets
             r   r   r   =   s    &!!EB OGV +- +&(mmF&;&;#GVV	 +,,V44	 +**622""6** | +gbk22 +B777-GaK-( 	1)0GGg%& 	+ ffVGHH%566F7OMgkM*F*C z yXX]]3q66+r   c                   .    e Zd ZU dZeed<   eed<   d ZdS )MissingValuesz'Data class for missing data informationr#   nonec                     g }| j         r|                    d           | j        r|                    t          j                   |S )z3Convert tuple to a list where None is always first.N)r.   appendr#   np)selfoutputs     r   to_listzMissingValues.to_listj   sF    9 	 MM$8 	"MM"&!!!r   N)__name__
__module____qualname____doc__bool__annotations__r4    r   r   r-   r-   d   s=         11	III
JJJ    r   r-   c                     d | D             }|s| t          dd          fS d|v r7t          |          dk    rt          dd          }n#t          dd          }nt          dd          }| |z
  }||fS )a.  Extract missing values from `values`.

    Parameters
    ----------
    values: set
        Set of values to extract missing from.

    Returns
    -------
    output: set
        Set with missing values extracted.

    missing_values: MissingValues
        Object with missing value information.
    c                 4    h | ]}|t          |          |S Nr   ).0values     r   	<setcomp>z#_extract_missing.<locals>.<setcomp>   s,       U]mE6J6J]]]]r   F)r#   r.   Nr   T)r-   r%   )r   missing_values_setoutput_missing_valuesr3   s       r   _extract_missingrD   t   s      !    <}U;;;;;!!!!""a''$1e$$G$G$G!! %2d$F$F$F!! -$U C C C ((F(((r   c                   (     e Zd ZdZ fdZd Z xZS )_nandictz!Dictionary with support for nans.c                     t                                          |           |                                D ]\  }}t          |          r
|| _         d S d S r>   )super__init__itemsr   	nan_value)r2   mappingkeyr@   	__class__s       r   rI   z_nandict.__init__   sc    !!!!--// 	 	JCS!! !&	 	r   c                 l    t          | d          rt          |          r| j        S t          |          )NrK   )hasattrr   rK   KeyErrorr2   rM   s     r   __missing__z_nandict.__missing__   7    4%% 	"-*<*< 	">!smmr   )r5   r6   r7   r8   rI   rS   __classcell__rN   s   @r   rF   rF      sM        ++          r   rF   c                     t          | |          \  }}t          d t          |          D                       |                    fd| D             t	          |                     S )z,Map values based on its position in uniques.c                     i | ]\  }}||	S r;   r;   )r?   ivals      r   
<dictcomp>z#_map_to_integer.<locals>.<dictcomp>   s    >>>Cc1>>>r   c                      g | ]
}|         S r;   r;   )r?   vtables     r   
<listcomp>z#_map_to_integer.<locals>.<listcomp>   s    000AuQx000r   )r
   )r   rF   	enumerateasarrayr
   )r   r)   r   r&   r^   s       @r   _map_to_integerrb      sk    &'**EB>>9W+=+=>>>??E::0000000:HHHr   c                "   	 t          |           }t          |          \  }}t          |          }|                    |                                           t          j        || j                  }nP# t          $ rC t          d t          d | D                       D                       }t          d|           w xY w|f}|r|t          | |          fz  }|r|t          | |          fz  }t          |          dk    r|d         n|S )Nr   c              3   $   K   | ]}|j         V  d S r>   )r7   )r?   ts     r   	<genexpr>z!_unique_python.<locals>.<genexpr>   s$      LL!q~LLLLLLr   c              3   4   K   | ]}t          |          V  d S r>   )type)r?   r]   s     r   rg   z!_unique_python.<locals>.<genexpr>   s(      2K2Kq4772K2K2K2K2K2Kr   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setrD   sortedextendr4   r1   arrayr   	TypeErrorrb   _get_countsr%   )r   r   r   uniques_setmissing_valuesr)   typesr+   s           r   r   r      s<   
&kk&6{&C&C#^%%~--//000(7&,777 
 
 
LLs2K2KF2K2K2K/K/KLLLLL/',/ /
 
 	

 *C 30022 /FG,,..XX]]3q66+s   A2A5 5ACT)check_unknownc                l   t          | |          \  }}|                    | j        d          sB	 t          | |          S # t          $ r$}t          dt          |                     d}~ww xY w|r1t          | |          }|rt          dt          |                     t          || |          S )a  Helper function to encode values into [0, n_uniques - 1].

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.
    The numpy method has the limitation that the `uniques` need to
    be sorted. Importantly, this is not checked but assumed to already be
    the case. The calling method needs to ensure this for all non-object
    values.

    Parameters
    ----------
    values : ndarray
        Values to encode.
    uniques : ndarray
        The unique values in `values`. If the dtype is not object, then
        `uniques` needs to be sorted.
    check_unknown : bool, default=True
        If True, check for values in `values` that are not in `unique`
        and raise an error. This is ignored for object dtype, and treated as
        True in this case. This parameter is useful for
        _BaseEncoder._transform() to avoid calling _check_unknown()
        twice.

    Returns
    -------
    encoded : ndarray
        Encoded values
    numericz%y contains previously unseen labels: Nr   )	r   isdtyper   rb   rQ   
ValueErrorstr_check_unknownr   )r   r)   rs   r   r&   ediffs          r   _encoder|      s    : &'**EB::flI.. 
5	O"67333 	O 	O 	OMSVVMMNNN	O  	V!&'22D V !TT!T!TUUUWf4444s   A   
A.
A))A.c                 x   t          | |          \  }}d}|                    | j        d          st          |           }t	          |          \  }}t          |          t	                    \  |z
  }|j        oj         }	|j        oj         }
fd|rQ|s|	s|
r"|                    fd| D                       }n)|                    t          |           |j
                  }t          |          }|
r|                    d           |	r|                    t          j                   n|                    |           }t          |||d          }|rB|j        rt#          | ||          }n)|                    t          |           |j
                  }|                    |                    |                    rV|                    |          }|                    |          r,|j        r|r|                    |           }d||<   ||          }t          |          }|r||fS |S )	a  
    Helper function to check for unknowns in values to be encoded.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : array
        Values to check for unknowns.
    known_values : array
        Known values. Must be unique.
    return_mask : bool, default=False
        If True, return a mask of the same shape as `values` indicating
        the valid values.

    Returns
    -------
    diff : list
        The unique values present in `values` and not in `know_values`.
    valid_mask : boolean array
        Additionally returned if ``return_mask=True``.

    Nru   c                 N    | v p j         o| d u pj        ot          |           S r>   )r.   r#   r   )r@   missing_in_uniquesrp   s    r   is_validz _check_unknown.<locals>.is_valid  sD    $ )%* "TM) &) )!%((r   c                 &    g | ]} |          S r;   r;   )r?   r@   r   s     r   r_   z"_check_unknown.<locals>.<listcomp>(  s!    &K&K&K5xx&K&K&Kr   rd   Tassume_uniquer   )r   rv   r   rj   rD   r#   r.   rm   onesr%   r9   listr0   r1   r!   r	   r"   r   anyisnan)r   known_valuesreturn_maskr   r&   
valid_mask
values_setmissing_in_valuesr{   nan_in_diffnone_in_diffr!   diff_is_nanis_nanr   r   rp   s                 @@@r   ry   ry      s   2 &,//EBJ::flI.. 3[[
(8(D(D%
%,''*:;*G*G''K''+J4F4J0J(-M6H6M2M	 	 	 	 	 	  	A A{ Al AXX&K&K&K&KF&K&K&KLL

WWS[[W@@
Dzz 	KK 	 KK((00-rNNN 	Ay A"6<<<

WWS[[W@@
 66"((<(()) 		*((4..Kvvk"" *9 + +XXf--F)*Jv& [L)Dzz  ZKr   c                   .     e Zd ZdZ fdZd Zd Z xZS )_NaNCounterz$Counter with support for nan values.c                 p    t                                          |                     |                     d S r>   )rH   rI   _generate_items)r2   rJ   rN   s     r   rI   z_NaNCounter.__init__O  s/    --e4455555r   c              #      K   |D ]=}t          |          s|V  t          | d          sd| _        | xj        dz  c_        >dS )z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r   rP   r   )r2   rJ   items      r   r   z_NaNCounter._generate_itemsR  sf       	  	 D && 


4-- #!"NNaNNN	  	 r   c                 l    t          | d          rt          |          r| j        S t          |          )Nr   )rP   r   r   rQ   rR   s     r   rS   z_NaNCounter.__missing__\  rT   r   )r5   r6   r7   r8   rI   r   rS   rU   rV   s   @r   r   r   L  s\        ..6 6 6 6 6           r   r   c                    | j         j        dv rt          |           }t          j        t          |          t          j                  }t          |          D ]<\  }}t          t                    5  ||         ||<   ddd           n# 1 swxY w Y   =|S t          | d          \  }}t          j        ||d          }t          j        |d                   rt          j        |d                   rd|d<   t          j        |||                   }	t          j        |t          j                  }||	         ||<   |S )zGet the count of each of the `uniques` in `values`.

    The counts will use the order passed in by `uniques`. For non-object dtypes,
    `uniques` is assumed to be sorted and `np.nan` is at the end.
    OUrd   NT)r   r   r   )r   kindr   r1   zerosr%   int64r`   r   rQ   r   isinr   searchsorted
zeros_like)
r   r)   counterr3   rY   r   r!   r(   uniques_in_valuesunique_valid_indicess
             r   ro   ro   b  sy    |D  f%%#g,,bh777 )) 	* 	*GAt(## * *#DMq	* * * * * * * * * * * * * * *&vTBBBM6 dKKK	xb!"" %rx'<'< % $"?='BS:TUU]7"(333F &'; <FMs   3BB	B	)FF)F)collectionsr   
contextlibr   typingr   numpyr1   
_array_apir   r   r	   r
   r   _missingr   r   r   r-   rD   dictrF   rb   r   r|   ry   r   ro   r;   r   r   <module>r      s                                       $ # # # # # ',5 & & & & &R$, $, $, $,N    J    #) #) #)L    t    I I I, , ,4 /3 (5 (5 (5 (5 (5VS S S Sl    '   ,    r   