
    cMh;              	      F   d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	Z	ddl
ZddlmZ ddlmZ ddlZdd	lmZ dd
lmZ erddlmZmZmZmZ dZdZdZdZg dZdZ dZ!dZ"dZ#de  de" de! de# d	Z$de  de! dZ%dZ&d&dZ'd'd!Z(d" Z)d# Z* G d$ d%eej+                  Z,dS )(a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)abc)datetimeN)TYPE_CHECKING)Appender)find_stack_level)
get_handle)
ReaderBase)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
zRead observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
datestrstrreturnr   c                f    	 t          j        | d          S # t          $ r t          j        cY S w xY w)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)r    s    W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pandas/io/sas/sas_xport.py_parse_dater)      sA     *;<<<   vs    00sc                r    i }d}|D ],\  }}| |||z                                             ||<   ||z  }-|d= |S )a  
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    r   r   )strip)r*   partsoutstartr   lengths         r(   _split_liner1      s[     CE  feefn,-3355D	CJ    c                    |dk    rnt          j        t          |           t          j        d                    }t          j        d| dd|z
             }|                    |          }| |d<   |S | S )N   S8Sz,Sdtypef0)npzeroslenr8   view)vecnbytesvec1r8   vec2s        r(   _handle_truncated_float_vecrB      sv     {{xC"(4..113V33q6z3344yyuy%%T
Jr2   c                   t          j        d          }|                     |          }|d         }|d         }|dz  }t          j        t	          |           t           j                  }d|t          j        |dz            <   d|t          j        |d	z            <   d
|t          j        |dz            <   ||z  }||z	  |dz  dd
|z
  z   z  z  }|dz  }||dz	  dz  dz
  dz  |z   dz   dz  |dz  z  z  }t          j        t	          |          fd          }||d<   ||d<   |                    d          }|                    d          }|S )zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4r7   r9   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r:   r8   r=   r;   r<   uint8whereemptyastype)	r>   r8   r@   xport1xport2ieee1shiftieee2ieees	            r(   _parse_float_vecrY      s{   
 HYE88%8  D$ZF$ZF ZE HSXXRX...E+,E"(6J&
'
'(+,E"(6J&
'
'(+,E"(6J&
'
'( 
eOEu_&:"52U;K!LME 
ZE 
6R<4'2-!3u<tCJ E 8SZZM333DDJDJ9959!!D;;tDKr2   c                      e Zd ZeZ	 	 	 	 dddZddZd ZddZddZ	ddZ
d d!dZd Z ee          d d"d            ZdS )#XportReaderN
ISO-8859-1inferfilepath_or_bufferFilePath | ReadBuffer[bytes]encoding
str | None	chunksize
int | Nonecompressionr   r"   Nonec                   || _         d| _        || _        || _        t	          |d|d|          | _        | j        j        | _        	 |                                  d S # t          $ r | 
                                  w xY w)Nr   rbF)r`   is_textrd   )	_encoding_lines_read_index
_chunksizer	   handleshandler^   _read_header	Exceptionclose)selfr^   indexr`   rb   rd   s         r(   __init__zXportReader.__init__  s     "#!#
 
 
 #',"5	 	 	 	JJLLL	s   A  A>c                8    | j                                          d S N)rm   rq   rr   s    r(   rq   zXportReader.close  s    r2   c                Z    | j                             d                                          S )NP   )r^   readdecoderw   s    r(   _get_rowzXportReader._get_row   s%    &++B//66888r2   c                   | j                             d           |                                 }|t          k    r"d|v rt	          d          t	          d          |                                 }ddgddgd	dgd
dgddgg}t          ||          }|d         dk    rt	          d          t          |d                   |d<   || _        |                                 }t          |d d                   |d<   |                                 }|                                 }|                    t                    }|t          k    }	|r|	st	          d          t          |dd                   }
ddgddgddgddgd	dgd
dgddgg}t          |                                 |          }ddgd
dgddgddgg}|                    t          |                                 |                     t          |d                   |d<   t          |d                   |d<   || _        ddd}t          |                                 dd                   }|
|z  }|dz  r|d|dz  z
  z  }| j                             |          }g }d}t          |          |
k    r|d |
         ||
d          }}|                    d          }t#          j        d|          }t'          t)          t*          |                    }|d
= ||d                   |d <   |d!         }|d          dk    r!|d"k     s|dk    rd#| d$}t-          |          |                                D ]-\  }}	 |                                ||<   # t2          $ r Y *w xY w||d!         z  }||gz  }t          |          |
k    |                                 }|t4          k    st	          d%          || _        || _        | j                                         | _        |                                 | _         d& | j        D             | _!        d' tE          | j                  D             }tG          j$        |          }|| _%        d S )(Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrJ   versionr4   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rE   rF   6   :   ry      z>hhhh8s40s8shhh2s8shhl52sr   r   rF   zFloating field width z is not between 2 and 8.zObservation header not found.c                B    g | ]}|d                                           S )r   )r{   ).0xs     r(   
<listcomp>z,XportReader._read_header.<locals>.<listcomp>  s(    @@@q&	((**@@@r2   c                h    g | ]/\  }}d t          |          z   dt          |d                   z   f0S )r*   r6   r   )r!   )r   ifields      r(   r   z,XportReader._read_header.<locals>.<listcomp>  sM     
 
 
5 3q66\3U>%:!;!;;<
 
 
r2   )&r^   seekr|   _correct_line1r%   r1   r)   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforz   r<   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr,   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobscolumns	enumerater:   r8   _dtype)rr   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructr   flmsgkvheaderdtypelr8   s                                r(   ro   zXportReader._read_header#  s   $$Q''' N""5(( !R   BCCC"~	1~ay3)iQS_Us++	X":::@AAA*9Y+?@@	)" +E#2#J 7 7	* --//--//&&'788	//	 	8i 	86777gben-- qMONN1I"IO
 "$--//377B#rWbMFA;G;t}}<<==="-k*.E"F"FJ!,[-C!D!DI& &))B/00
$z1
? 	/"zB..J+00<<	
)nn// *?*+/**+ "J $))#..J -(CZPPKZ5566Ec
"5>2E'N~&BW~**aR!VVJbJJJnn$  1 wwyyE!HH%   D %//JugF7 )nn//: ,,,<===' 388::&&((	@@DK@@@
 
%dk22
 
 
   s   M33
N ?N pd.DataFramec                <    |                      | j        pd          S )NrE   nrows)rz   rl   rw   s    r(   __next__zXportReader.__next__  s    yyt3!y444r2   r   c                   | j                             dd           | j                                         | j        z
  }|dz  dk    r"t	          j        dt                                 | j        dk    r)| j                             | j                   || j        z  S | j                             dd           | j                             d          }t          j
        |t          j                  }t          j        |dk              }t          |          dk    rd}nd	t          |          z  }| j                             | j                   ||z
  | j        z  S )
z
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        r   rF   ry   zxport file may be corrupted.)
stacklevelir7   l     @@  r4   )r^   r   r   r   warningswarnr   r   rz   r:   
frombufferuint64flatnonzeror<   )rr   total_records_lengthlast_card_bytes	last_cardixtail_pads         r(   r   zXportReader._record_count  sU    	$$Q***#6;;==@QQ"$))M.+--   
 ""#(():;;;'4+===$$S!,,,166r::M/CCC	 ^I)<<==r77a<<HH3r77{H$$T%6777$x/D4FFFr2   sizec                @    || j         }|                     |          S )a  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        Nr   )rl   rz   )rr   r   s     r(   	get_chunkzXportReader.get_chunk  s$     <?Dyyty$$$r2   c                    |                     d          }|d         dk    |d         dk    z  |d         dk    z  }|d         dk    |d         d	k    z  |d         d
k    z  |d         dk    z  }||z  }|S )Nzu1,u1,u2,u4r7   rD   r   f2f3r9   rL   Z   _   .   )r=   )rr   r>   r   missmiss1s        r(   _missing_doublezXportReader._missing_double  s    HH=H))$14A.!D'Q,?go!D'T/2w$ w$  	
 	r2   r   c                    | j         }t          | j          j        z
            }| j        z  }|dk    r                                  t
           j                            |          }t          j	        | j
        |          }i }t           j                  D ]\  }}|dt          |          z            }	 j        |         d         }
|
dk    rUt          |	 j        |         d                   }	                     |	          }t#          |	          }t          j        ||<   n8 j        |         d         dk    r!d |	D             } j         fd	|D             }|                    ||i           t+          j        |          } j        5t+          j        t3           j         j        |z                       |_        n|                     j                  } xj        |z  c_        |S )
Nr   )r8   countr*   r   r   r   r   c                6    g | ]}|                                 S  )rstrip)r   ys     r(   r   z$XportReader.read.<locals>.<listcomp>  s     ---AQXXZZ---r2   c                D    g | ]}|                     j                  S r   )r{   ri   )r   r   rr   s     r(   r   z$XportReader.read.<locals>.<listcomp>  s'    ===a$.11===r2   )r   minrj   r   rq   StopIterationr^   rz   r:   r   r   r   r   r!   r   rB   r   rY   nanri   r   r&   	DataFramerk   Indexrangers   	set_index)rr   r   
read_linesread_lenrawdatadf_datajr   r>   r   r   r   dfs   `             r(   rz   zXportReader.read  s   =IE	D,< <==
 22q==JJLLL%**844}S:FFFdl++ 	# 	#DAqsSVV|$CKN7+E	!!1#t{1~n7UVV++C00$S))&$Q(F22----->-====1===ANNAq6""""\'"";xd&68H:8U V VWWBHHdk**BJ&	r2   )Nr\   Nr]   )
r^   r_   r`   ra   rb   rc   rd   r   r"   re   )r"   re   )r"   r   )r"   r   rv   )r   rc   r"   r   )r   rc   r"   r   )__name__
__module____qualname___xport_reader_doc__doc__rt   rq   r|   ro   r   r   r   r   r   _read_method_docrz   r   r2   r(   r[   r[      s       G
 + $*1    8   9 9 9l l l l\5 5 5 5$G $G $G $GL% % % % %"	 	 	 X% % % %  % % %r2   r[   )r    r!   r"   r   )r*   r!   )-r   
__future__r   collectionsr   r   r   typingr   r   numpyr:   pandas.util._decoratorsr   pandas.util._exceptionsr   pandasr&   pandas.io.commonr	   pandas.io.sas.sasreaderr
   pandas._typingr   r   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r   r)   r1   rB   rY   Iteratorr[   r   r2   r(   <module>r     sl    # " " " " "                               , , , , , , 4 4 4 4 4 4     ' ' ' ' ' ' . . . . . .            ' 
 R ' 
'   
(C @9 A
    	 
   2   	          ,  &6 6 6r~ ~ ~ ~ ~*cl ~ ~ ~ ~ ~r2   