
    J/PhW,                         d dl Zd dlmZ d dlmZmZmZmZ dgZ	ddgZ
ddgZd	 Z G d
 de          Zd Zd Zd Zd Zd ZdS )    N)
PatsyError)
safe_isnansafe_scalar_isnanno_picklingassert_no_picklingNAActionNoneNaNraisedropc                 @    d                     d | D                       S )Nz, c                 ,    g | ]}t          |          S  )repr).0opts     M/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/patsy/missing.py
<listcomp>z!_desc_options.<locals>.<listcomp>3   s    333Cd3ii333    )join)optionss    r   _desc_optionsr   2   s#    99337333444r   c                   D    e Zd ZdZdddgfdZd Zd Zd Zd	 Zd
 Z	e
ZdS )r   a  An :class:`NAAction` object defines a strategy for handling missing
    data.

    "NA" is short for "Not Available", and is used to refer to any value which
    is somehow unmeasured or unavailable. In the long run, it is devoutly
    hoped that numpy will gain first-class missing value support. Until then,
    we work around this lack as best we're able.

    There are two parts to this: First, we have to determine what counts as
    missing data. For numerical data, the default is to treat NaN values
    (e.g., ``numpy.nan``) as missing. For categorical data, the default is to
    treat NaN values, and also the Python object None, as missing. (This is
    consistent with how pandas does things, so if you're already using
    None/NaN to mark missing data in your pandas DataFrames, you're good to
    go.)

    Second, we have to decide what to do with any missing data when we
    encounter it. One option is to simply discard any rows which contain
    missing data from our design matrices (``drop``). Another option is to
    raise an error (``raise``). A third option would be to simply let the
    missing values pass through into the returned design matrices. However,
    this last option is not yet implemented, because of the lack of any
    standard way to represent missing values in arbitrary numpy matrices;
    we're hoping numpy will get this sorted out before we standardize on
    anything ourselves.

    You can control how patsy handles missing data through the ``NA_action=``
    argument to functions like :func:`build_design_matrices` and
    :func:`dmatrix`. If all you want to do is to choose between ``drop`` and
    ``raise`` behaviour, you can pass one of those strings as the
    ``NA_action=`` argument directly. If you want more fine-grained control
    over how missing values are detected and handled, then you can create an
    instance of this class, or your own object that implements the same
    interface, and pass that as the ``NA_action=`` argument instead.
    r   r	   r
   c           	      f   || _         | j         t          vr(t          d|dt          t                    d          t	          |t
                    rt          d          t          |          | _        | j        D ]3}|t          vr(t          d|dt          t                    d          4dS )a  The :class:`NAAction` constructor takes the following arguments:

        :arg on_NA: How to handle missing values. The default is ``"drop"``,
          which removes all rows from all matrices which contain any missing
          values. Also available is ``"raise"``, which raises an exception
          when any missing values are encountered.
        :arg NA_types: Which rules are used to identify missing values, as a
          list of strings. Allowed values are:

          * ``"None"``: treat the ``None`` object as missing in categorical
            data.
          * ``"NaN"``: treat floating point NaN values as missing in
            categorical and numerical data.

        .. versionadded:: 0.2.0
        zinvalid on_NA action z (should be one of )z$NA_types should be a list of stringszinvalid NA_type N)	on_NA_valid_NA_responses
ValueErrorr   
isinstancestrtupleNA_types_valid_NA_types)selfr   r"   NA_types       r   __init__zNAAction.__init__[   s    " 
:000*+055-@S2T2T2T2TV   h$$ 	ECDDDh} 	 	Go-- j/6wwo8V8V8V8VX   .	 	r   c                 T    d| j         v rt          |          rdS d| j         v r|dS dS )zgReturn True if `obj` is a categorical NA value.

        Note that here `obj` is a single scalar value.r
   Tr	   NF)r"   r   )r$   objs     r   is_categorical_NAzNAAction.is_categorical_NA|   s?     DM!!&7&<&<!4T]""s{4ur   c                     t          j        |j        t                    }d| j        v r|t          j        |          z  }|j        dk    rt          j        |d          }|S )zReturns a 1-d mask array indicating which rows in an array of
        numerical values contain at least one NA value.

        Note that here `arr` is a numpy array or pandas DataFrame.dtyper
      )axis)npzerosshapeboolr"   isnanndimany)r$   arrmasks      r   is_numerical_NAzNAAction.is_numerical_NA   s]    
 x	...DM!!BHSMM!D9q==6$Q'''Dr   c                 (   t          |          t          |          cxk    rt          |          k    sn J t          |          dk    r|S | j        dk    r|                     |||          S | j        dk    r|                     |||          S J )a  Takes a set of factor values that may have NAs, and handles them
        appropriately.

        :arg values: A list of `ndarray` objects representing the data.
          These may be 1- or 2-dimensional, and may be of varying dtype. All
          will have the same number of rows (or entries, for 1-d arrays).
        :arg is_NAs: A list with the same number of entries as `values`,
          containing boolean `ndarray` objects that indicate which rows
          contain NAs in the corresponding entry in `values`.
        :arg origins: A list with the same number of entries as
          `values`, containing information on the origin of each
          value. If we encounter a problem with some particular value, we use
          the corresponding entry in `origins` as the origin argument when
          raising a :class:`PatsyError`.
        :returns: A list of new values (which may have a differing number of
          rows.)
        r   r   r   )lenr   _handle_NA_raise_handle_NA_drop)r$   valuesis_NAsoriginss       r   	handle_NAzNAAction.handle_NA   s    $ 6{{c&kk9999S\\999999v;;!M:  ((AAAZ6!!''@@@Lr   c                 z    t          ||          D ])\  }}t          j        |          rt          d|          *|S )Nzfactor contains missing values)zipr/   r5   r   )r$   r=   r>   r?   is_NAorigins         r   r;   zNAAction._handle_NA_raise   sN     11 	K 	KME6ve}} K !A6JJJKr   c                     t          j        |d         j        d         t                    }|D ]}||z  }| fd|D             S )Nr   r+   c                 $    g | ]}|d f         S ).r   )r   v	good_masks     r   r   z,NAAction._handle_NA_drop.<locals>.<listcomp>   s"    222a)S.!222r   )r/   r0   r1   r2   )r$   r=   r>   r?   
total_maskrC   rH   s         @r   r<   zNAAction._handle_NA_drop   s^    XfQioa0===
 	  	 E%JJK	222262222r   N)__name__
__module____qualname____doc__r&   r)   r8   r@   r;   r<   r   __getstate__r   r   r   r   r   6   s        " "H $vuo    B  
 
 
  8  3 3 3 LLLr   c                     dd l } |                     t          t          d           |                     t          t          d           |                     t          t          d           t	          t                                 d S )Nr   pordr   )r
   asdfr"   r
   )pytestraisesr   r   r   )rT   s    r   test_NAAction_basicrV      sk    MMM
MM*hfM555
MM*hMAAA
MM*hM777xzz"""""r   c                     g dgdgddgfD ]} t          |           }dD ]}t          j        d|z   t                    }ddg}|j        d	k    r%|j        d	         d	k    rt          j        ||dd	gf<   nt          j        ||<   t          j        d
t                    }d| v rd||<   |	                    |          }t          j
        ||          sJ d S )Nr
   r	   rS   )r   )r-   )   )   r+   r   rX   r-   rY   T)r   r/   onesfloatr4   r1   nanr0   r2   r8   array_equal)r"   actionextra_shaper6   nan_rowsexp_NA_maskgot_NA_masks          r    test_NAAction_NA_types_numericalrc      s   %6(UFO< < <8,,,+ 	< 	<K'$,E:::C1vHx!||	!q 0 0(*Hq!f$%% "H(1D111K  (,H% 0055K>+{;;;;;;	<< <r   c                  $   g dgdgddgfD ]} t          |           }|                    d          rJ |                    d          rJ |                    d           d| v k    sJ |                    t          j                  d| v k    sJ d S )Nr
   r	   rS   ar-   )r   r)   r/   r\   )r"   r^   s     r   "test_NAAction_NA_types_categoricalrf      s    %6(UFO< G G8,,,++C00000++A.....''--&H2DEEEE''//EX4EFFFFFG Gr   c            
         t          d          } t          j        g d          t          j        g d          t          j        dt          j        gddgddgd	d
gdt          j        gg          g}t          j        g d          t          j        dt
                    t          j        g d          g}|                     ||d gdz            }t          |          dk    sJ t          j        |d         ddg          sJ t          j        |d         ddg          sJ t          j        |d         ddgd	d
gg          sJ d S )Nr   )rX   rh   rY      )      $@      4@g      >@      D@g      I@      ?g      @g      @rj   g      @g      @g      @g       @)TFTFFri   r+   )TFFFT   r   rX   rY   r-   rk   rl   )	r   r/   asarrayr\   r0   r2   r@   r:   r]   )r^   	in_valuesr>   
out_valuess       r   test_NAAction_droprr      sV   fF

$$$%%

11122

S"&MC:c{S#JbfVWWI 	
44455
$

44455F
 !!)VdVaZ@@Jz??a>*Q-!Q00000>*Q-$66666>*Q-3*sCj)ABBBBBBBr   c                     t          d          } t          j        ddg          t          j        ddg          g}t          j        ddg          gdz  }|                     ||d d g          }t          j        |d         |d                   sJ t          j        |d         |d                   sJ dd	lm}  |d
dd          } |d
dd          }t          j        d          }t          j        ddg          t          j        dt          j        g          g}t          j        ddg          t          j        ddg          g}	 |                     ||||g           J # t          $ r}|j
        |u sJ Y d }~d S d }~ww xY w)Nr   rQ   g?g333333?r-   rX   Fr   )OriginrR   rn   rm   T)r   r/   ro   r@   r]   patsy.originrt   aranger\   r   rD   )	r^   in_arrsr>   got_arrsrt   o1o2in_idxes	            r   test_NAAction_raiser}      s   G$$$F z3*%%rz1a&'9'9:Gj%(()A-F$>>H>(1+wqz22222>(1+wqz22222######	1		B	1		B Yq\\Fz3*%%rz3-'@'@AGj%(("*eT]*C*CDF&2r(333   x2~~~~~~~~~~s   E( (
F2FF)numpyr/   patsyr   
patsy.utilr   r   r   r   __all__r#   r   r   objectr   rV   rc   rf   rr   r}   r   r   r   <module>r      s  N           U U U U U U U U U U U U ,5/' 5 5 5F F F F Fv F F FR# # #< < <"G G GC C C&    r   