
    0Phŕ              
          d Z dZdZdZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZmZmZ g dZd	Zd	Zd
ZdZdZ ej        dej                  Z ej        dej                  Z ej        dej                  Z ej        d          Z ej        dej                  Z ej        dej                  Zee	         Zee	df         Zerddlm Z   G d de           Z!n
e
e"ef         Z!d Z# e#            \  Z$Z%ddddddddd	d 	Z&d!  e'd"          D             Z(e()                    d# e&*                                D                        de(d$<   e&)                    d%  e'd&          D                        d' Z+d( Z,d) Z-dZ.d*Z/d+Z0d,Z1d-Z2e.e/e0e1e2gZ3 G d. d/e4          Z5 G d0 d1e5          Z6 G d2 d3e5          Z7 G d4 d5e5          Z8 G d6 d7e5          Z9 G d8 d9e5          Z: G d: d;e5          Z; G d< d=e5          Z< G d> d?e5          Z= G d@ dAe5          Z> G dB dCe5          Z? G dD dEe5          Z@dF ZAdG ZB G dH dI          ZC G dJ dK          ZD G dL dM          ZE G dN dO          ZF G dP dQeFeE          ZG G dR dS          ZH G dT dU          ZI G dV dWeFeI          ZJdX ZKdY ZL G dZ d[          ZM G d\ d]          ZNd^e.fd_ZOd^e.fd`ZPda ZQdb ZRdS )ca  
The liac-arff module implements functions to read and write ARFF files in
Python. It was created in the Connectionist Artificial Intelligence Laboratory
(LIAC), which takes place at the Federal University of Rio Grande do Sul
(UFRGS), in Brazil.

ARFF (Attribute-Relation File Format) is an file format specially created for
describe datasets which are commonly used for machine learning experiments and
software. This file format was created to be used in Weka, the best
representative software for machine learning automated experiments.

An ARFF file can be divided into two sections: header and data. The Header
describes the metadata of the dataset, including a general description of the
dataset, its name and its attributes. The source below is an example of a
header section in a XOR dataset::

    %
    % XOR Dataset
    %
    % Created by Renato Pereira
    %            rppereira@inf.ufrgs.br
    %            http://inf.ufrgs.br/~rppereira
    %
    %
    @RELATION XOR

    @ATTRIBUTE input1 REAL
    @ATTRIBUTE input2 REAL
    @ATTRIBUTE y REAL

The Data section of an ARFF file describes the observations of the dataset, in
the case of XOR dataset::

    @DATA
    0.0,0.0,0.0
    0.0,1.0,1.0
    1.0,0.0,1.0
    1.0,1.0,0.0
    %
    %
    %

Notice that several lines are starting with an ``%`` symbol, denoting a
comment, thus, lines with ``%`` at the beginning will be ignored, except by the
description part at the beginning of the file. The declarations ``@RELATION``,
``@ATTRIBUTE``, and ``@DATA`` are all case insensitive and obligatory.

For more information and details about the ARFF file description, consult
http://www.cs.waikato.ac.nz/~ml/weka/arff.html


ARFF Files in Python
~~~~~~~~~~~~~~~~~~~~

This module uses built-ins python objects to represent a deserialized ARFF
file. A dictionary is used as the container of the data and metadata of ARFF,
and have the following keys:

- **description**: (OPTIONAL) a string with the description of the dataset.
- **relation**: (OBLIGATORY) a string with the name of the dataset.
- **attributes**: (OBLIGATORY) a list of attributes with the following
  template::

    (attribute_name, attribute_type)

  the attribute_name is a string, and attribute_type must be an string
  or a list of strings.
- **data**: (OBLIGATORY) a list of data instances. Each data instance must be
  a list with values, depending on the attributes.

The above keys must follow the case which were described, i.e., the keys are
case sensitive. The attribute type ``attribute_type`` must be one of these
strings (they are not case sensitive): ``NUMERIC``, ``INTEGER``, ``REAL`` or
``STRING``. For nominal attributes, the ``atribute_type`` must be a list of
strings.

In this format, the XOR dataset presented above can be represented as a python
object as::

    xor_dataset = {
        'description': 'XOR Dataset',
        'relation': 'XOR',
        'attributes': [
            ('input1', 'REAL'),
            ('input2', 'REAL'),
            ('y', 'REAL'),
        ],
        'data': [
            [0.0, 0.0, 0.0],
            [0.0, 1.0, 1.0],
            [1.0, 0.0, 1.0],
            [1.0, 1.0, 0.0]
        ]
    }


Features
~~~~~~~~

This module provides several features, including:

- Read and write ARFF files using python built-in structures, such dictionaries
  and lists;
- Supports `scipy.sparse.coo <http://docs.scipy
  .org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html#scipy.sparse.coo_matrix>`_
  and lists of dictionaries as used by SVMLight
- Supports the following attribute types: NUMERIC, REAL, INTEGER, STRING, and
  NOMINAL;
- Has an interface similar to other built-in modules such as ``json``, or
  ``zipfile``;
- Supports read and write the descriptions of files;
- Supports missing values and names with spaces;
- Supports unicode values and names;
- Fully compatible with Python 2.7+, Python 3.5+, pypy and pypy3;
- Under `MIT License <http://opensource.org/licenses/MIT>`_

z7Renato de Pontes Pereira, Matthias Feurer, Joel NothmanzTrenato.ppontes@gmail.com, feurerm@informatik.uni-freiburg.de, joel.nothman@gmail.comz2.4.0    N)TYPE_CHECKING)OptionalListDictAnyIteratorUnionTupleNUMERICREALINTEGERSTRING%z	@RELATIONz
@ATTRIBUTEz@DATAz^([^\{\}%,\s]*|\".*\"|\'.*\')$z%^(\".*\"|\'.*\'|[^\{\}%,\s]*)\s+(.+)$z["\'\\\s%,\000-\031]z(?=["\'\\%])|[\n\r\t\000-\031]z^\s*\{.*\}\s*$z["'{}\s].)	TypedDictc                   H    e Zd ZU eed<   eed<   eed<   eeef         ed<   dS )ArffContainerTypedescriptionrelation
attributesdataN)	__name__
__module____qualname__str__annotations__r   r	   ArffDenseDataTypeArffSparseDataType     W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/externals/_arff.pyr   r      sK         %'99::::::r    r   c                      d} d| d|                      dd          d}t          j        d                    |                    }t          j        d	d
|iz            }||fS )Na  
                    "      # open quote followed by zero or more of:
                    (?:
                        (?<!\\)    # no additional backslash
                        (?:\\\\)*  # maybe escaped backslashes
                        \\"        # escaped quote
                    |
                        \\[^"]     # escaping a non-quote
                    |
                        [^"\\]     # non-quote char
                    )*
                    "      # close quote
                    z(?:
        z4|          # a value may be surrounded by "
        "'zd|          # or by '
        [^,\s"'{}]+  # or may contain no characters requiring quoting
        )z(?x)
        ,                # may follow ','
        \s*
        ((?=,)|$|{value_re})  # empty or value
        |
        (\S.*)           # error
        )value_rea:  (?x)
        (?:^\s*\{|,)   # may follow ',', or '{' at line start
        \s*
        (\d+)          # attribute key
        \s+
        (%(value_re)s) # value
        |
        (?!}\s*$)      # not an error if it's }$
        (?!^\s*{\s*}\s*$)  # not an error if it's ^{}$
        \S.*           # error
        r%   )replacerecompileformat)	quoted_rer%   densesparses       r!   _build_re_valuesr-      s    II$ 		!!#s++++-H J  FHF%%' 'E Z 
 8$
% 
& 
&F &=r    \r#   r$   	
)	z\\z\"z\'z\tz\nz\rz\bz\fz\%c                 4    i | ]}t          |          d |z  S )z\%03ochr.0is     r!   
<dictcomp>r:      s$    ===aSVVX\===r        c                     i | ]\  }}||	S r   r   r8   kvs      r!   r:   r:      s    CCC41a!QCCCr     c                 4    i | ]}d |z  t          |          S )z\%dr5   r7   s     r!   r:   r:     s$    >>>q
CFF>>>r    
   c                 X   |                                  }t          |          dk    r.	 t          |         S # t          $ r t	          d|z            w xY w|d         dk    r%t          t          |dd          d                    S t          t          |dd          d                    S )N   zUnsupported escape sequence: %s   u      )grouplen_ESCAPE_SUB_MAPKeyError
ValueErrorr6   int)matchss     r!   _escape_sub_callbackrQ     s    A
1vv{{	D"1%% 	D 	D 	D>BCCC	Dts{{3qub>>"""3qua==!!!s	   6 Ac                 p    | d d         dv r#t          j        dt          | dd                   S | dv rd S | S )NrE   )r#   r$   z\\([0-9]{1,3}|u[0-9a-f]{4}|.)?r@   )r'   subrQ   )r?   s    r!   _unquoterW     sM    !u
v68L"g  		
itr    c                 @   t                               |           s,d t          t          j        | g                    D             S t          t                              d| z              \  }}t          |          sd |D             S t          
                    |           r	 d t                              |           D             S # t          $ rf t                              |           D ];}|                    d          s$t          d|                                z            <t          d          w xY wt                              |           D ];}|                    d          r$t          d|                                z            <t          d          )	z-(INTERNAL) Split a line into a list of valuesc                     g | ]
}|d v rdn|S )rT   Nr   )r8   rP   s     r!   
<listcomp>z!_parse_values.<locals>.<listcomp>   s4     0 0 0 YA 0 0 0r    ,c                 ,    g | ]}t          |          S r   )rW   )r8   r?   s     r!   rZ   z!_parse_values.<locals>.<listcomp>&  s    ,,,,,,r    c                 N    i | ]"\  }}t          |          t          |          #S r   )rN   rW   r=   s      r!   r:   z!_parse_values.<locals>.<dictcomp>)  s@     B B B1 FFHQKK B B Br    rE   zError parsing %rzUnknown parsing errorrD   )_RE_NONTRIVIAL_DATAsearchnextcsvreaderzip_RE_DENSE_VALUESfindallany_RE_SPARSE_LINErO   _RE_SPARSE_KEY_VALUESrM   finditerrI   	BadLayout)rP   valueserrorsrO   s       r!   _parse_valuesrm     s   %%a(( 00 0cj!oo..0 0 0 	0 *2237;;<NFFv;; -,,V,,,,Q 1	5B B 5 = =a @ @B B B B 	5 	5 	5.77:: H H{{1~~ H#$6$FGGGH3444	5 &..q11 	D 	DE{{1~~ D 2U[[]] BCCCD/000s   $#C A0D8rE   rD         c                   6    e Zd ZU dZee         ed<   d Zd ZdS )ArffExceptionNmessagec                     d| _         d S )NrS   )lineselfs    r!   __init__zArffException.__init__F  s    			r    c                      | j         | j        z  S N)rr   rt   ru   s    r!   __str__zArffException.__str__I  s    |DI%%r    )	r   r   r   rr   r   r   r   rw   rz   r   r    r!   rq   rq   C  sH         !GXc]!!!  & & & & &r    rq   c                       e Zd ZdZdZdS )BadRelationFormatzCError raised when the relation declaration is in an invalid format.z!Bad @RELATION format, at line %d.Nr   r   r   __doc__rr   r   r    r!   r|   r|   L  s        MM1GGGr    r|   c                       e Zd ZdZdZdS )BadAttributeFormatzEError raised when some attribute declaration is in an invalid format.z"Bad @ATTRIBUTE format, at line %d.Nr}   r   r    r!   r   r   P  s        OO2GGGr    r   c                   "     e Zd ZdZ fdZ xZS )BadDataFormatz=Error raised when some data instance is in an invalid format.c                 b    t                                                       dd|z  z   | _        d S )Nz&Bad @DATA instance format in line %d: %ssuperrw   rr   rv   value	__class__s     r!   rw   zBadDataFormat.__init__V  s3    4E\ 	r    r   r   r   r~   rw   __classcell__r   s   @r!   r   r   T  s>        GG
 
 
 
 
 
 
 
 
r    r   c                       e Zd ZdZdZdS )BadAttributeTypezSError raised when some invalid type is provided into the attribute
    declaration.z Bad @ATTRIBUTE type, at line %d.Nr}   r   r    r!   r   r   ]  s         0GGGr    r   c                   "     e Zd ZdZ fdZ xZS )BadAttributeNamezTError raised when an attribute name is provided twice the attribute
    declaration.c                 n    t                                                       d|z  dz   d|z  z   | _        d S )NzBad @ATTRIBUTE name %s at linez( %d, this name is already in use in linez %d.r   )rv   r   value2r   s      r!   rw   zBadAttributeName.__init__f  s@    -567f_ 	r    r   r   s   @r!   r   r   b  sB         
 
 
 
 
 
 
 
 
r    r   c                   "     e Zd ZdZ fdZ xZS )BadNominalValuezyError raised when a value in used in some data instance but is not
    declared into it respective attribute declaration.c                 b    t                                                       d|z  dz   | _        d S )Nz0Data value %s not found in nominal declaration, zat line %d.r   r   s     r!   rw   zBadNominalValue.__init__r  s4    ?%G 	r    r   r   s   @r!   r   r   n  sB        : :
 
 
 
 
 
 
 
 
r    r   c                   "     e Zd ZdZ fdZ xZS )BadNominalFormattingzDError raised when a nominal value with space is not properly quoted.c                 b    t                                                       d|z  dz   | _        d S )Nz4Nominal data value "%s" not properly quoted in line z%d.r   r   s     r!   rw   zBadNominalFormatting.__init__{  s4    CeK 	r    r   r   s   @r!   r   r   y  s>        NN
 
 
 
 
 
 
 
 
r    r   c                       e Zd ZdZdZdS )BadNumericalValuezPError raised when and invalid numerical value is used in some data
    instance.z$Invalid numerical value, at line %d.Nr}   r   r    r!   r   r     s         4GGGr    r   c                       e Zd ZdZdZdS )BadStringValuez;Error raise when a string contains space but is not quoted.z Invalid string value at line %d.Nr}   r   r    r!   r   r     s        EE0GGGr    r   c                   (     e Zd ZdZdZd fd	Z xZS )rj   zBError raised when the layout of the ARFF file has something wrong.z,Invalid layout of the ARFF file, at line %d.r@   c                     t                                                       |r-t          j        dz   |                    dd          z   | _        d S d S )N r   z%%)r   rw   rj   rr   r&   )rv   msgr   s     r!   rw   zBadLayout.__init__  sT     	L$,s2S[[d5K5KKDLLL	L 	Lr    r@   )r   r   r   r~   rr   rw   r   r   s   @r!   rj   rj     sR        LL<GL L L L L L L L L Lr    rj   c                        e Zd ZdZddZd ZdS )	BadObjectzPError raised when the object representing the ARFF file has something
    wrong.Invalid object.c                     || _         d S ry   r   )rv   r   s     r!   rw   zBadObject.__init__  s    r    c                     d| j         z  S )Nr   r   ru   s    r!   rz   zBadObject.__str__  s    dhr    N)r   )r   r   r   r~   rw   rz   r   r    r!   r   r     sA                r    r   c                 @    t           |                                          S ry   )_UNESCAPE_SUB_MAPrI   )rO   s    r!   _unescape_sub_callbackr     s    U[[]]++r    c                     t                               |           r#dt                              t          |           z  S | S )Nz'%s')_RE_QUOTE_CHARSr_   _RE_ESCAPE_CHARSrV   r   )rP   s    r!   encode_stringr     s;    a   H(,,-CQGGGGHr    c                       e Zd Zd Zd ZdS )EncodedNominalConversorc                 V    d t          |          D             | _        d| j        d<   d S )Nc                     i | ]\  }}||	S r   r   )r8   r9   r?   s      r!   r:   z4EncodedNominalConversor.__init__.<locals>.<dictcomp>  s    :::1q!:::r    r   )	enumeraterk   rv   rk   s     r!   rw   z EncodedNominalConversor.__init__  s.    ::	&(9(9:::Ar    c                 X    	 | j         |         S # t          $ r t          |          w xY wry   )rk   rL   r   rv   r   s     r!   __call__z EncodedNominalConversor.__call__  s=    	);u%% 	) 	) 	)!%(((	)s    )Nr   r   r   rw   r   r   r    r!   r   r     s2          ) ) ) ) )r    r   c                       e Zd Zd Zd ZdS )NominalConversorc                 H    t          |          | _        |d         | _        d S Nr   )setrk   
zero_valuer   s     r!   rw   zNominalConversor.__init__  s    &kk )r    c                 j    || j         vr|dk    r| j        S t          |          t          |          S r   )rk   r   r   r   r   s     r!   r   zNominalConversor.__call__  s<    ##zz
 &!%(((5zzr    Nr   r   r    r!   r   r     s2        $ $ $	 	 	 	 	r    r   c                   4    e Zd ZdZd Zed             Zd ZdS )DenseGeneratorDatazyInternal helper class to allow for different matrix types without
    making the code a huge collection of if statements.c              #     K   |D ]}t          |          t          t                    rZr/t                    t	          |          k    rt          |          fdt          t	          |                    D             n/t	                    t	          |          k    rt          |          |                     |          V  d S )Nc                 ,    g | ]}|v r|         nd S )r   r   )r8   r9   rk   s     r!   rZ   z2DenseGeneratorData.decode_rows.<locals>.<listcomp>  s5     2 2 2aqF{{&)) 2 2 2r    )rm   
isinstancedictmaxrJ   r   range_decode_values)rv   stream
conversorsrowrk   s       @r!   decode_rowszDenseGeneratorData.decode_rows  s       	: 	:C"3''F&$'' - -c&kkS__<<',,,2 2 2 2J002 2 2 v;;#j//11',,,%%fj999999	: 	:r    c                     	 d t          ||           D             } n6# t          $ r)}dt          |          v rt                      Y d }~nd }~ww xY w| S )Nc                 2    g | ]\  }}|d n
 ||          S ry   r   )r8   	conversorr   s      r!   rZ   z5DenseGeneratorData._decode_values.<locals>.<listcomp>  s>     2 2 2*)U $mdd51A1A 2 2 2r    float: )rc   rM   r   r   )rk   r   excs      r!   r   z!DenseGeneratorData._decode_values  s    	*2 2Z002 2 2FF  	* 	* 	*CHH$$'))) %$$$$	* s    
AAAc           	   #     K   d}|D ]}t          |          t          |          k    r/t          d|t          |          t          |          fz            g }|D ]D}||dk    s||k    rd}nt          t          |                    }|                    |           E|dz  }d                    |          V  dS )aZ  (INTERNAL) Encodes a line of data.

        Data instances follow the csv format, i.e, attribute values are
        delimited by commas. After converted from csv.

        :param data: a list of values.
        :param attributes: a list of attributes. Used to check if data is valid.
        :return: a string with the encoded data line.
        r   *Instance %d has %d attributes, expected %dNr@   rU   rE   r[   )rJ   r   r   r   appendjoin)rv   r   r   current_rowinstnew_datar   rP   s           r!   encode_datazDenseGeneratorData.encode_data  s        	% 	%D4yyC
OO++@!3t99c*oo>?  
 H # #=ERKK5E>>AA%c%jj11A""""1K((8$$$$$$!	% 	%r    N)r   r   r   r~   r   staticmethodr   r   r   r    r!   r   r     sW        ; ;: : :    \% % % % %r    r   c                   "     e Zd ZdZ fdZ xZS )_DataListMixinz>Mixin to return a list from decode_rows instead of a generatorc                 b    t          t                                          ||                    S ry   )listr   r   )rv   r   r   r   s      r!   r   z_DataListMixin.decode_rows  s%    EGG''
;;<<<r    )r   r   r   r~   r   r   r   s   @r!   r   r     s>        HH= = = = = = = = =r    r   c                       e Zd ZdS )DataNr   r   r   r   r    r!   r   r             Dr    r   c                       e Zd Zd Zd ZdS )COODatac                 x   g g g }}}t          |          D ]\  }}t          |          }t          |t                    st	                      |s;t          t          |                                           \  }	}	 fdt          |	|          D             }nJ# t          $ r%}
dt          |
          v rt                       d }
~
wt          $ r t          |          w xY w|                    |           |                    |gt          |          z             |                    |	            |||fS )Nc                 @    g | ]\  }}||n |         |          S ry   r   r8   keyr   r   s      r!   rZ   z'COOData.decode_rows.<locals>.<listcomp>  sI     C C C(c5 $)=%%ojoe6L6L C C Cr    r   )r   rm   r   r   rj   rc   sorteditemsrM   r   r   
IndexErrorr   extendrJ   )rv   r   r   r   rowscolsr9   r   rk   row_colsr   s     `        r!   r   zCOOData.decode_rows  sr   r2Dd'' 	" 	"FAs"3''Ffd++ "kk! "F6<<>>$:$:;Hf	)C C C C,/&,A,AC C C   C((+--- ) ) )#C((() KKKKc&kk)***KK!!!!T4s   >B
C"% CC"c              #     	K   t          |          }g }d}|j        	|j        }|j        }t	          	fdt          t          	          dz
            D                       st          d          t          ||	          D ]\  }}		|k    r@|	k     r:d                    dd                    |          dg          V  g }|dz  }|	k     :||k    rt          d	||dz   |fz            ||d
k    s||k    rd}nt          t          |                    }|                    d||fz             d                    dd                    |          dg          V  d S )Nr   c              3   B   K   | ]}|         |d z            k    V  dS )rE   Nr   )r8   r9   r   s     r!   	<genexpr>z&COOData.encode_data.<locals>.<genexpr>5  s4      EEA3q6SQZ'EEEEEEr    rE   z8liac-arff can only output COO matrices with sorted rows.r   {r[   }z3Instance %d has at least %d attributes, expected %dr@   rU   %d %s)rJ   r   colr   allr   rM   rc   r   r   r   r   r   )
rv   r   r   num_attributesr   r   r   r?   rP   r   s
            @r!   r   zCOOData.encode_data+  s     Zhhy EEEEs3xx!|1D1DEEEEE 	- , - - - tS#.. 	0 	0KAsC[  !C''((C(););S#ABBBBB!H1$K "C''
 n$$I #'>:;  
 yAGGqAvv!#a&&))OOGsAh.////hhSXXh//56666666r    Nr   r   r   r   r   r   r    r!   r   r     s2             4"7 "7 "7 "7 "7r    r   c                       e Zd Zd Zd ZdS )LODGeneratorDatac              #   T  K   |D ]}t          |          }t          |t                    st                      	 fd|                                D             V  X# t
          $ r%}dt          |          v rt                       d }~wt          $ r t          |          w xY wd S )Nc                 B    i | ]\  }}||d n |         |          S ry   r   r   s      r!   r:   z0LODGeneratorData.decode_rows.<locals>.<dictcomp>W  sE     9 9 9%3 EMDDz#u7M7M 9 9 9r    r   )
rm   r   r   rj   r   rM   r   r   r   r   )rv   r   r   r   rk   r   s     `   r!   r   zLODGeneratorData.decode_rowsP  s       	) 	)C"3''Ffd++ "kk!	)9 9 9 9)/9 9 9 9 9 9 9   C((+--- ) ) )#C((()	) 	)s   "A
B%( BB%c              #     K   d}t          |          }|D ]}g }t          |          dk    r8t          |          |k    r%t          d|t          |          dz   |fz            t          |          D ]Q}||         }||dk    s||k    rd}	nt	          t          |                    }	|                    d||	fz             R|dz  }d                    dd	                    |          d
g          V  d S )Nr   r   rE   r@   rU   r   r   r   r[   r   )rJ   r   r   r   r   r   r   r   )
rv   r   r   r   r   r   r   r   r?   rP   s
             r!   r   zLODGeneratorData.encode_dataa  s'     Z 	; 	;CH3xx!||CN : :@ #c((Q,?@  
 c{{ 4 4H9R166AA%c!ff--A3( 233331K((C(!3!3S9::::::%	; 	;r    Nr   r   r    r!   r   r   O  s2        ) ) )"; ; ; ; ;r    r   c                       e Zd ZdS )LODDataNr   r   r    r!   r   r   y  r   r    r   c                 :   | t           k    rt                      S | t          k    rt                      S | t          k    rt                      S | t          k    rt                      S | t          k    rt                      S t          dt          |           z            )NzMatrix type %s not supported.)DENSEr   COOr   LODr   	DENSE_GENr   LOD_GENr   rM   r   )matrix_types    r!   _get_data_object_for_decodingr  }  s    evv			yy			yy			!	!!###			!!!83{;K;KKLLLr    c                     t          | d          r(| j        dk    rt                      S t          d          t	          | d         t
                    rt                      S t                      S )Nr)   coozCannot guess matrix format!r   )hasattrr)   r   rM   r   r   r   r   )matrixs    r!   _get_data_object_for_encodingr    sc    vx   =E!!99:;;;	F1It	$	$ yyvvr    c                   B    e Zd ZdZd Zd Zd Zd ZdefdZ	defdZ
d	S )
ArffDecoderzAn ARFF decoder.c                 "    g | _         d| _        dS )zConstructor.r   N)_conversors_current_lineru   s    r!   rw   zArffDecoder.__init__  s    r    c                 2    t          j        dd|          }|S )a  (INTERNAL) Decodes a comment line.

        Comments are single line strings starting, obligatorily, with the ``%``
        character, and can have any symbol, including whitespaces or special
        characters.

        This method must receive a normalized string, i.e., a string without
        padding, including the "
" characters.

        :param s: a normalized string.
        :return: a string with the decoded comment.
        z^\%( )?r@   )r'   rV   )rv   rP   ress      r!   _decode_commentzArffDecoder._decode_comment  s     fZQ''
r    c                     |                     dd          \  }}|                                }t                              |          st	                      t          |                    d                    }|S )aF  (INTERNAL) Decodes a relation line.

        The relation declaration is a line with the format ``@RELATION
        <relation-name>``, where ``relation-name`` is a string. The string must
        start with alphabetic character and must be quoted if the name includes
        spaces, otherwise this method will raise a `BadRelationFormat` exception.

        This method must receive a normalized string, i.e., a string without
        padding, including the "
" characters.

        :param s: a normalized string.
        :return: a string with the decoded relation name.
        r   rE   "')splitstrip_RE_RELATIONrO   r|   r   )rv   rP   _r?   r  s        r!   _decode_relationzArffDecoder._decode_relation  sd     wwsA1GGII!!!$$ 	&#%%%!''%..!!
r    c                    |                     dd          \  }}|                                }t                              |          }|st	                      |                                \  }}t          |                    d                    }|dd         dk    rr|dd         dk    rd	 t          |                    d                    }n# t          $ r t                      w xY wt          |t                    rt                      n3t          |                                          }|d	vrt                      ||fS )
a  (INTERNAL) Decodes an attribute line.

        The attribute is the most complex declaration in an arff file. All
        attributes must follow the template::

             @attribute <attribute-name> <datatype>

        where ``attribute-name`` is a string, quoted if the name contains any
        whitespace, and ``datatype`` can be:

        - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.
        - Strings as ``STRING``.
        - Dates (NOT IMPLEMENTED).
        - Nominal attributes with format:

            {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}

        The nominal names follow the rules for the attribute names, i.e., they
        must be quoted if the name contains whitespaces.

        This method must receive a normalized string, i.e., a string without
        padding, including the "
" characters.

        :param s: a normalized string.
        :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).
        r   rE   r  Nr   rS   r   z{} r   )r  r  _RE_ATTRIBUTErO   r   groupsr   rm   	Exceptionr   r   r   upper)rv   rP   r  r?   mnametype_s          r!   _decode_attributezArffDecoder._decode_attribute  sI   6 wwsA1GGII "" 	'$&&& hhjje 4::e$$%% !9bcc
c 1 1)%ekk%&8&899 ) ) )&((()%&& )&((()
 JJ$$&&EDDD&(((e}s   ."C C*Fc                     d _         t          t                    r<                    d                              dd                              d          ddg g d}i }t          |          }t          }t                    D ]q} xj         dz  c_         |                    d          }|s+|	                                }	|	
                    t                    r2|t          k    r'|d	xx                              |          dz   z  cc<   |	
                    t                    r9|t          k    rt                      t          }                     |          |d
<   |	
                    t                    r(|t          k    r|t          k    rt                      t          }                     |          }
|
d         |v r"t#          |
d         ||
d                             j         ||
d         <   |d                             |
           t          |
d         t&          t(          f          r.|rt+          |
d                   }n;t-          |
d                   }n%t          d t.          t.          d}||
d                  } j                            |           !|	
                    t2                    r|t          k    rt                       n+|	
                    t4                    r	 st                       fd}|                     |             j                  |d<   |d	                             d          r|d	         dd         |d	<   |S )zDo the job the ``encode``.r   z
 z
r0   r@   )r   r   r   r   rE   z 
r   r   r   c                 :    t          t          |                     S ry   )rN   float)xs    r!   <lambda>z%ArffDecoder._decode.<locals>.<lambda>G  s    #eAhh-- r    )r   r   r   r   c               3      K   D ]F} xj         dz  c_         |                                 } | r|                     t                    s| V  Gd S )NrE   )r  r  
startswith_TK_COMMENT)r   rP   rv   s    r!   r   z#ArffDecoder._decode.<locals>.stream_  sf        ""a'""iikk s~~k:: III r    r   NrS   )r  r   r   r  r&   r  r  _TK_DESCRIPTIONiterr!  r,  r  _TK_RELATIONrj   r  _TK_ATTRIBUTEr%  r   r   r   tupler   r   r(  r  _TK_DATAr-  r   endswith)rv   rP   encode_nominalr  objattribute_namesr   STATEr   u_rowattrr   CONVERSOR_MAPr   s   ``            r!   _decodezArffDecoder._decode  s     a 	C  ((66<<TBBA 	"
 "
  -[99  GG A	 A	C!#))G$$C IIKKE 00 4Uo5M5MM"""d&:&:3&?&?$&FF"""" !!,// /O++#++%$"&"7"7"<"<J !!-00 &L((Um-C-C#++%%--c227o--*47ODG4LMMM/3/AODG,L!((...d1ge}55 
7% >$;DG$D$D		$4T!W$=$=		/20G0G05-2%4 %4M !.d1g 6I ''	2222 !!(++ 	M))#++% !!+..  ++	 	 	 	 	 	 &&vvxx1ABBF}&&t,, 	9!$]!3CRC!8C
r    c                 t    	 |                      |||          S # t          $ r}| j        |_        |d}~ww xY w)a  Returns the Python representation of a given ARFF file.

        When a file object is passed as an argument, this method reads lines
        iteratively, avoiding to load unnecessary information to the memory.

        :param s: a string or file object with the ARFF file.
        :param encode_nominal: boolean, if True perform a label encoding
            while reading the .arff file.
        :param return_type: determines the data structure used to store the
            dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,
            `arff.DENSE_GEN` or `arff.LOD_GEN`.
            Consult the sections on `working with sparse data`_ and `loading
            progressively`_.
        )r5  r  N)r<  rq   r  rt   )rv   rP   r5  return_typees        r!   decodezArffDecoder.decoden  sW    	<<.,7   9 9 9 	 	 	'AFG	s    
727N)r   r   r   r~   rw   r  r  r%  r  r<  r@  r   r    r!   r  r    s          
     .8 8 8t ).5 i i i iV (-%      r    r  c                   2    e Zd ZdZd	dZd Zd Zd Zd ZdS )
ArffEncoderzAn ARFF encoder.r@   c                 2    |rt           d|S dt           z  S )a  (INTERNAL) Encodes a comment line.

        Comments are single line strings starting, obligatorily, with the ``%``
        character, and can have any symbol, including whitespaces or special
        characters.

        If ``s`` is None, this method will simply return an empty comment.

        :param s: (OPTIONAL) string.
        :return: a string with the encoded comment line.
        r   r   )r-  )rv   rP   s     r!   _encode_commentzArffEncoder._encode_comment  s(      	&'KK+++%%r    c                 :    dD ]}||v rd|z  } nt           d|S )a  (INTERNAL) Decodes a relation line.

        The relation declaration is a line with the format ``@RELATION
        <relation-name>``, where ``relation-name`` is a string.

        :param name: a string.
        :return: a string with the encoded relation declaration.
         %{},"%s"r   )r0  )rv   r#  chars      r!   _encode_relationzArffEncoder._encode_relation  sB      	 	Dt||d{  %dd++r    c                     dD ]}||v rd|z  } nt          |t          t          f          r$d |D             }dd                    |          z  }t          d|d|S )a  (INTERNAL) Encodes an attribute line.

        The attribute follow the template::

             @attribute <attribute-name> <datatype>

        where ``attribute-name`` is a string, and ``datatype`` can be:

        - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.
        - Strings as ``STRING``.
        - Dates (NOT IMPLEMENTED).
        - Nominal attributes with format:

            {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}

        This method must receive a the name of the attribute and its type, if
        the attribute type is nominal, ``type`` must be a list of values.

        :param name: a string.
        :param type_: a string or a list of string.
        :return: a string with the encoded attribute declaration.
        rF  rG  c                 2    g | ]}d t          |          z  S )r   )r   )r8   type_ks     r!   rZ   z1ArffEncoder._encode_attribute.<locals>.<listcomp>  s%    III}V444IIIr    z{%s}z, r   )r   r2  r   r   r1  )rv   r#  r$  rH  type_tmps        r!   _encode_attributezArffEncoder._encode_attribute  s    .  	 	Dt||d{  eeT]++ 	1II5IIIHDIIh//0E(==$$$66r    c                 j    d |                      |          D             }d                    |          S )zEncodes a given object to an ARFF file.

        :param obj: the object containing the ARFF information.
        :return: the ARFF file as an string.
        c                     g | ]}|S r   r   )r8   r   s     r!   rZ   z&ArffEncoder.encode.<locals>.<listcomp>  s    555555r    r0   )iter_encoder   )rv   r6  r   s      r!   encodezArffEncoder.encode  s5     65t//44555yyr    c              #   H  K   |                     dd          r5|d                             d          D ]}|                     |          V  |                     d          st          d          |                     |d                   V  dV  |                     d          st          d          t                      }|d         D ]e}t          |t          t          f          r.t          |          d	k    st          |d
         t                    st          dt          |          z            t          |d         t                    r/|d         t          vrt          dt          |          z            nAt          |d         t          t          f          st          dt          |          z            |d
         |v r%t          dt          |d
                   z            |                    |d
                    |                     |d
         |d                   V  gdV  |d         }t          V  d|v rQt          |                     d                    }|                    |                     d          |          E d{V  dV  dS )a  The iterative version of `arff.ArffEncoder.encode`.

        This encodes iteratively a given object and return, one-by-one, the
        lines of the ARFF file.

        :param obj: the object containing the ARFF information.
        :return: (yields) the ARFF file as strings.
        r   Nr0   r   z.Relation name not found or with invalid value.r@   r   zAttributes not found.rD   r   z"Invalid attribute declaration "%s"rE   zInvalid attribute type "%s"z6Trying to use attribute name "%s" for the second time.r   )getr  rD  r   rI  r   r   r2  r   rJ   r   _SIMPLE_TYPESaddrN  r3  r  r   )rv   r6  r   r7  r:  r   r   s          r!   rQ  zArffEncoder.iter_encode  s      77=$'' 	0=)//55 0 0**3////// wwz"" 	NLMMM##C
O44444 ww|$$ 	53444%%% 	; 	;DdUDM22 P4yyA~~d1gs++  DSYY NOOO$q'3'' I7-//#$A#d))$KLLL 0  Q%77 I =c$ii GHHH Aw/)) !/14T!W!> ? ? ?  ##DG,,,((a$q'::::::&
 S==0AAD''DDDDDDDDDr    Nr   )	r   r   r   r~   rD  rI  rN  rR  rQ  r   r    r!   rB  rB    sk        & & & &", , ,  7  7  7D  ; ; ; ; ;r    rB  Fc                 N    t                      }|                    | ||          S )a(  Load a file-like object containing the ARFF document and convert it into
    a Python object.

    :param fp: a file-like object.
    :param encode_nominal: boolean, if True perform a label encoding
        while reading the .arff file.
    :param return_type: determines the data structure used to store the
        dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,
        `arff.DENSE_GEN` or `arff.LOD_GEN`.
        Consult the sections on `working with sparse data`_ and `loading
        progressively`_.
    :return: a dictionary.
     r5  r>  r  r@  )fpr5  r>  decoders       r!   loadr\    s/     mmG>>"^&1  3 3 3r    c                 N    t                      }|                    | ||          S )a  Convert a string instance containing the ARFF document into a Python
    object.

    :param s: a string object.
    :param encode_nominal: boolean, if True perform a label encoding
        while reading the .arff file.
    :param return_type: determines the data structure used to store the
        dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,
        `arff.DENSE_GEN` or `arff.LOD_GEN`.
        Consult the sections on `working with sparse data`_ and `loading
        progressively`_.
    :return: a dictionary.
    rX  rY  )rP   r5  r>  r[  s       r!   loadsr^  '  s/     mmG>>!N&1  3 3 3r    c                     t                      }|                    |           }t          |          }|D ]}|                    |dz              |}|                    |           |S )zSerialize an object representing the ARFF document to a given file-like
    object.

    :param obj: a dictionary.
    :param fp: a file-like object.
    r0   )rB  rQ  r`   write)r6  rZ  encoder	generatorlast_rowr   s         r!   dumprd  9  sn     mmG##C((IIH  
D!!!HHXIr    c                 H    t                      }|                    |           S )zSerialize an object representing the ARFF document, returning a string.

    :param obj: a dictionary.
    :return: a string with the ARFF document.
    )rB  rR  )r6  ra  s     r!   dumpsrf  K  s     mmG>>#r    )Sr~   
__author____author_email____version__r'   ra   typingr   r   r   r   r   r   r	   r
   rU  r.  r-  r0  r1  r3  r(   UNICODEr  r  r   r   rg   r^   r   r   typing_extensionsr   r   r   r-   rd   rh   rK   r   r   updater   rQ   rW   rm   r  r  r  r  r  _SUPPORTED_DATA_STRUCTURESr   rq   r|   r   r   r   r   r   r   r   r   rj   r   r   r   r   r   r   r   r   r   r   r   r  r  r  rB  r\  r^  rd  rf  r   r    r!   <module>ro     s  6t tj G
-   				 



             D D D D D D D D D D D D D D D D D D 9882:?LL2:F
SS"*4bjAA2:?@@ "*.
;; bjrz:: TN 49%   '++++++; ; ; ; ;I ; ; ; ; S#X/ / /f +;*:*<*< ' ' 
 
 >=5599===    CC?+@+@+B+BCCC D D D "    >>EE"II>>> ? ? ?
" 
" 
"  1 1 1< 	
	
#S#y'B & & & & &I & & &2 2 2 2 2 2 2 23 3 3 3 3 3 3 3
 
 
 
 
M 
 
 
1 1 1 1 1} 1 1 1


 

 

 

 

} 

 

 

	
 	
 	
 	
 	
m 	
 	
 	

 
 
 
 
= 
 
 
5 5 5 5 5 5 5 5
1 1 1 1 1] 1 1 1L L L L L L L L       , , ,  	) 	) 	) 	) 	) 	) 	) 	)       ";% ;% ;% ;% ;% ;% ;% ;%|= = = = = = = =	 	 	 	 	>- 	 	 	=7 =7 =7 =7 =7 =7 =7 =7~(; (; (; (; (; (; (; (;T	 	 	 	 	n. 	 	 	M M M
 
 
h h h h h h h hVK K K K K K K K` "u 3 3 3 3$ "u 3 3 3 3$  $    r    