
    q-Ph~                    $   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZmZmZmZ d dlmZ d d
lmZ d dlmZ  d dl!m"Z"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2 d dl3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@mAZA d dlBmCZC erd dlmDZD d dlEmFZFmGZGmHZH d|dZId}dZJd~d$ZKed%d%d%d%d%d%d%d%d%d%d%d%d%d&dd=            ZLed%d%d%d%d%d%d%d%d%d%d%d%d%d%d>dd?            ZLed%d%d%d%d%d%d%d%d%d%d%d%d@ddC            ZLed%d%d%d%d%d%d%d%d%d%d%d%d%dDddG            ZLed%d%d%d%d%d%d%d%d%d%d%d%d%dDddH            ZLed%d%d%d%d%d%d%d%d%d%d%d%d@ddJ            ZL e"dKd.dLM           e"dNd0dOM          ddddPdddQdde)ddQdQdQdRddV                        ZLed%d%d%d%d%d%d%d%d%dW	ddX            ZMed%d%d%d%d%d%d%d%d%d%dY
ddZ            ZMed%d%d%d%d%d%d%d%d[dd\            ZMed%d%d%d%d%d%d%d%d%d]	dd^            ZMed%d%d%d%d%d%d%d%d%d]	dd_            ZMed%d%d%d%d%d%d%d%d[dd`            ZMdddQdde)ddQdQdQdY
ddaZMddde)dddQdQdQdQdb
ddeZNddgZOddkZPddmZQddqZRdQdQdrddtZSdduZTddvZUddwddyZVddwddzZWddwdd{ZXdS )    )annotationsN)defaultdict)Sequence)time)glob)BufferedReaderBytesIOStringIOTextIOWrapper)Path)IOTYPE_CHECKINGAnyCallableNoReturnoverload)
from_arrow)	functions)deprecate_renamed_parameterissue_deprecation_warning)deduplicate_namesnormalize_filepathparse_version)
N_INFER_DEFAULTBooleanDateDatetimeDurationInt64NullStringTimeUInt8)FLOAT_DTYPESINTEGER_DTYPESNUMERIC_DTYPES)import_optional)ModuleUpgradeRequiredErrorNoDataErrorParameterCollisionErrorconcat)looks_like_urlprocess_file_url)read_csv)Literal)ExcelSpreadsheetEngine
FileSource
SchemaDictsourcer2   returntuple[Any, bool]c                   d}g }t          | t                    r|                                 } t          | t                    rt          | t          t
          f          rd}| g} | D ]}t          |t
          t          j        f          rt          |          	                                st          j
                            t          |                    }t          |          r|                    |           |                    t          |d          x}           |sd|}t!          |          d}t          |t          j                  rt          |          }|                    |           ||fS )z1Unpack any glob patterns, standardise file paths.TF)	recursivezno workbook found at path )
isinstance
memoryviewtobytesr   bytesstrosPathLiker   existspath
expanduserr-   appendextendr   FileNotFoundError)r4   read_multiple_workbookssourcessrcfilesmsgs         _/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/polars/io/spreadsheet/functions.py_sourcesrL   3   ss   "G&*%% "!!fh'' :fucl+K+K "'    cC-.. 	 tCyy7G7G7I7I 	 '$$SXX..Cc"" s###NND$=$=$==5>>> -:3::',,,&*###r{++ #hhNN3+++    sr=   c                0    t          j        dd|           S )z/Standardize columns with '_duplicated_n' names.z_duplicated_(\d+)z\1)replstring)resub)rN   s    rK   _standardize_duplicatesrT   Q   s    6&U1====rM   frames2list[pl.DataFrame] | list[dict[str, pl.DataFrame]]rF   boolr   c               v   | sd}t          |          |s| d         S t          | d         t          j                  rt	          | d          S t          t                    }| D ]7}|                                D ] \  }}||                             |           !8d |                                D             S )Nz3no data found in the given workbook(s) and sheet(s)r   vertical_relaxedhowc                8    i | ]\  }}|t          |d           S )rY   rZ   r+   ).0kvs      rK   
<dictcomp>z(_unpack_read_results.<locals>.<dictcomp>l   s,    VVVA6!!3444VVVrM   )	r)   r9   pl	DataFramer,   r   listitemsrC   )rU   rF   rJ   sheet_framesressheetdfs          rK   _unpack_read_resultsri   V   s    
  C#" ay&)R\** 	Wf"45555 #4(( 	/ 	/C YY[[ / /	rU#**2..../VVASASAUAUVVVVrM   .)sheet_id
table_nameengineengine_optionsread_options
has_headercolumnsschema_overridesinfer_schema_lengthinclude_file_pathsdrop_empty_rowsdrop_empty_colsraise_if_emptyrj   None
sheet_namerk   
str | Nonerl   r1   rm   dict[str, Any] | Nonern   ro   rp   $Sequence[int] | Sequence[str] | Nonerq   SchemaDict | Nonerr   
int | Noners   rt   ru   rv   pl.DataFramec                   d S N r4   rj   rx   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   s                  rK   
read_excelr   o   	    $ 3rM   )rj   rx   rk   rl   rm   ro   rn   rp   rq   rr   rs   rt   ru   rv   c                   d S r   r   )r4   rj   rx   rk   rl   rm   ro   rn   rp   rq   rr   rs   rt   ru   rv   s                  rK   r   r      r   rM   )rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   intr   c                   d S r   r   r   s                  rK   r   r      s	    $ srM   )rx   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   Literal[0] | Sequence[int]dict[str, pl.DataFrame]c                   d S r   r   r   s                  rK   r   r      	    $ "crM   c                   d S r   r   r   s                  rK   r   r      r   rM   list[str] | tuple[str]c                   d S r   r   r   s                  rK   r   r      r   rM   xlsx2csv_optionsz0.20.6versionread_csv_optionsz0.20.7calamineT)rj   rx   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   int | Sequence[int] | None#str | list[str] | tuple[str] | None&pl.DataFrame | dict[str, pl.DataFrame]c                  	
 t          |           \  }}
	fd|D             }t          ||          S )aS  
    Read Excel spreadsheet data into a DataFrame.

    .. versionadded:: 1.20
        Support loading data from named table objects with `table_name` parameter.
    .. versionadded:: 1.18
        Support loading data from a list (or glob pattern) of multiple workbooks.
    .. versionchanged:: 1.0
        Default engine is now "calamine" (was "xlsx2csv").
    .. versionchanged:: 0.20.7
        The `read_csv_options` parameter was renamed `read_options`.
    .. versionchanged:: 0.20.6
        The `xlsx2csv_options` parameter was renamed `engine_options`.

    Parameters
    ----------
    source
        Path(s) to a file or a file-like object (by "file-like object" we refer to
        objects that have a `read()` method, such as a file handler like the builtin
        `open` function, or a `BytesIO` instance). For file-like objects, the stream
        position may not be updated after reading.
    sheet_id
        Sheet number(s) to convert (set `0` to load all sheets as DataFrames) and
        return a `{sheetname:frame,}` dict. (Defaults to `1` if neither this nor
        `sheet_name` are specified). Can also take a sequence of sheet numbers.
    sheet_name
        Sheet name(s) to convert; cannot be used in conjunction with `sheet_id`. If
        more than one is given then a `{sheetname:frame,}` dict is returned.
    table_name
        Name of a specific table to read; note that table names are unique across
        the workbook, so additionally specifying a sheet id or name is optional;
        if one of those parameters *is* specified, an error will be raised if
        the named table is not found in that particular sheet.
    engine : {'calamine', 'openpyxl', 'xlsx2csv'}
        Library used to parse the spreadsheet file; defaults to "calamine".

        * "calamine": this engine can be used for reading all major types of Excel
          Workbook (`.xlsx`, `.xlsb`, `.xls`) and is dramatically faster than the
          other options, using the `fastexcel` module to bind the Rust-based Calamine
          parser.
        * "openpyxl": this engine is significantly slower than both `calamine` and
          `xlsx2csv`, but can provide a useful fallback if you are otherwise unable
          to read data from your workbook.
        * "xlsx2csv": converts the data to an in-memory CSV before using the native
          polars `read_csv` method to parse the result.
    engine_options
        Additional options passed to the underlying engine's primary parsing
        constructor (given below), if supported:

        * "calamine": n/a (can only provide `read_options`)
        * "openpyxl": `load_workbook <https://openpyxl.readthedocs.io/en/stable/api/openpyxl.reader.excel.html#openpyxl.reader.excel.load_workbook>`_
        * "xlsx2csv": `Xlsx2csv <https://github.com/dilshod/xlsx2csv/blob/f35734aa453d65102198a77e7b8cd04928e6b3a2/xlsx2csv.py#L157>`_
    read_options
        Options passed to the underlying engine method that reads the sheet data.
        Where supported, this allows for additional control over parsing. The
        specific read methods associated with each engine are:

        * "calamine": `load_sheet_by_name <https://fastexcel.toucantoco.dev/fastexcel.html#ExcelReader.load_sheet_by_name>`_
          (or `load_table <https://fastexcel.toucantoco.dev/fastexcel.html#ExcelReader.load_table>`_
          if using the `table_name` parameter).
        * "openpyxl": n/a (can only provide `engine_options`)
        * "xlsx2csv": see :meth:`read_csv`
    has_header
        Indicate if the first row of the table data is a header or not. If False,
        column names will be autogenerated in the following format: `column_x`, with
        `x` being an enumeration over every column in the dataset, starting at 1.
    columns
        Columns to read from the sheet; if not specified, all columns are read. Can
        be given as a sequence of column names or indices.
    schema_overrides
        Support type specification or override of one or more columns.
    infer_schema_length
        The maximum number of rows to scan for schema inference. If set to `None`, the
        entire dataset is scanned to determine the dtypes, which can slow parsing for
        large workbooks. Note that only the "calamine" and "xlsx2csv" engines support
        this parameter.
    include_file_paths
        Include the path of the source file(s) as a column with this name.
    drop_empty_rows
        Indicate whether to omit empty rows when reading data into the DataFrame.
    drop_empty_cols
        Indicate whether to omit empty columns (with no headers) when reading data into
        the DataFrame (note that empty column identification may vary depending on the
        underlying engine being used).
    raise_if_empty
        When there is no data in the sheet,`NoDataError` is raised. If this parameter
        is set to False, an empty DataFrame (with no columns) is returned instead.

    Returns
    -------
    DataFrame
        If reading a single sheet.
    dict
        If reading multiple sheets, a "{sheetname: DataFrame, ...}" dict is returned.

    See Also
    --------
    read_ods

    Notes
    -----
    * Where possible, prefer the default "calamine" engine for reading Excel Workbooks,
      as it is significantly faster than the other options.
    * When using the `xlsx2csv` engine the target Excel sheet is first converted
      to CSV using `xlsx2csv.Xlsx2csv(source).convert()` and then parsed with Polars'
      :func:`read_csv` function. You can pass additional options to `read_options`
      to influence this part of the parsing pipeline.
    * If you want to read multiple sheets and set *different* options (`read_options`,
      `schema_overrides`, etc), you should make separate calls as the options are set
      globally, not on a per-sheet basis.

    Examples
    --------
    Read the "data" worksheet from an Excel file into a DataFrame.

    >>> pl.read_excel(
    ...     source="test.xlsx",
    ...     sheet_name="data",
    ... )  # doctest: +SKIP

    If the correct dtypes can't be determined, use the `schema_overrides` parameter
    to specify them, or increase the inference length with `infer_schema_length`.

    >>> pl.read_excel(
    ...     source="test.xlsx",
    ...     schema_overrides={"dt": pl.Date},
    ...     infer_schema_length=None,
    ... )  # doctest: +SKIP

    Using the `xlsx2csv` engine, read table data from sheet 3 in an Excel workbook as a
    DataFrame while skipping empty lines in the sheet. As sheet 3 does not have a header
    row, you can pass the necessary additional settings for this to the `read_options`
    parameter; these will be passed to :func:`read_csv`.

    >>> pl.read_excel(
    ...     source="test.xlsx",
    ...     sheet_id=3,
    ...     engine="xlsx2csv",
    ...     engine_options={"skip_empty_lines": True},
    ...     read_options={"has_header": False, "new_columns": ["a", "b", "c"]},
    ... )  # doctest: +SKIP
    c                L    g | ] }t          |	
           !S ))rj   rx   rk   rl   rm   rn   rq   rr   rs   rv   ro   rp   rt   ru   _read_spreadsheet)r]   rH   rp   ru   rt   rl   rm   ro   rs   rr   rv   rn   rq   rj   rx   rk   s     rK   
<listcomp>zread_excel.<locals>.<listcomp>  sm     B B B$ # 	!!)%- 31)!++	
 	
 	
B B BrM   rU   rF   rL   ri   )r4   rj   rx   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rG   rF   rU   s    ``````````````   rK   r   r      s    D (0'7'7$G$B B B B B B B B B B B B B B B B B$ %B B BF(   7   rM   )	rj   ro   rp   rq   rr   rs   rt   ru   rv   c       
            d S r   r   r4   rj   rx   ro   rp   rq   rr   rs   rt   ru   rv   s              rK   read_odsr     	     3rM   )
rj   rx   ro   rp   rq   rr   rs   rt   ru   rv   c       
            d S r   r   r   s              rK   r   r     r   rM   )ro   rp   rq   rr   rs   rt   ru   rv   c       
            d S r   r   r   s              rK   r   r     s	     srM   )	rx   ro   rp   rq   rr   rs   rt   ru   rv   c       
            d S r   r   r   s              rK   r   r     	     "crM   c       
            d S r   r   r   s              rK   r   r     r   rM   c       
            d S r   r   r   s              rK   r   r     r   rM   c       
   
        	
 t          |           \  }}	
f
d|D             }t          ||          S )a  
    Read OpenOffice (ODS) spreadsheet data into a DataFrame.

    Parameters
    ----------
    source
        Path to a file or a file-like object (by "file-like object" we refer to objects
        that have a `read()` method, such as a file handler like the builtin `open`
        function, or a `BytesIO` instance). For file-like objects, the stream position
        may not be updated accordingly after reading.
    sheet_id
        Sheet number(s) to convert, starting from 1 (set `0` to load *all* worksheets
        as DataFrames) and return a `{sheetname:frame,}` dict. (Defaults to `1` if
        neither this nor `sheet_name` are specified). Can also take a sequence of sheet
        numbers.
    sheet_name
        Sheet name(s) to convert; cannot be used in conjunction with `sheet_id`. If
        more than one is given then a `{sheetname:frame,}` dict is returned.
    has_header
        Indicate if the first row of the table data is a header or not. If False,
        column names will be autogenerated in the following format: `column_x`, with
        `x` being an enumeration over every column in the dataset, starting at 1.
    columns
        Columns to read from the sheet; if not specified, all columns are read. Can
        be given as a sequence of column names or indices.
    schema_overrides
        Support type specification or override of one or more columns.
    infer_schema_length
        The maximum number of rows to scan for schema inference. If set to `None`, the
        entire dataset is scanned to determine the dtypes, which can slow parsing for
        large workbooks.
    include_file_paths
        Include the path of the source file(s) as a column with this name.
    drop_empty_rows
        Indicate whether to omit empty rows when reading data into the DataFrame.
    drop_empty_cols
        Indicate whether to omit empty columns (with no headers) when reading data into
        the DataFrame (note that empty column identification may vary depending on the
        underlying engine being used).
    raise_if_empty
        When there is no data in the sheet,`NoDataError` is raised. If this parameter
        is set to False, an empty DataFrame (with no columns) is returned instead.

    Returns
    -------
    DataFrame, or a `{sheetname: DataFrame, ...}` dict if reading multiple sheets.

    See Also
    --------
    read_excel

    Examples
    --------
    Read the "data" worksheet from an OpenOffice spreadsheet file into a DataFrame.

    >>> pl.read_ods(
    ...     source="test.ods",
    ...     sheet_name="data",
    ... )  # doctest: +SKIP

    If the correct dtypes can't be determined, use the `schema_overrides` parameter
    to specify them, or increase the inference length with `infer_schema_length`.

    >>> pl.read_ods(
    ...     source="test.ods",
    ...     sheet_id=3,
    ...     schema_overrides={"dt": pl.Date},
    ...     raise_if_empty=False,
    ... )  # doctest: +SKIP
    c                L   
 g | ] }t          |
d di d 	          !S )Nr   )rj   rx   rk   rl   rm   rn   rq   rr   rs   rv   rt   ru   ro   rp   r   )r]   rH   rp   ru   rt   ro   rs   rr   rv   rq   rj   rx   s     rK   r   zread_ods.<locals>.<listcomp>g  sm     B B B$ # 	!- 31)++!	
 	
 	
B B BrM   r   r   )r4   rj   rx   ro   rp   rq   rr   rs   rt   ru   rv   rG   rF   rU   s    ``````````   rK   r   r     s    h (0'7'7$G$B B B B B B B B B B B B B$ %B B BF(   7   rM   )
rm   rn   rq   rr   rs   rp   ro   rv   rt   ru   str | IO[bytes] | bytesstr | Sequence[str] | Nonec          	     Z  	
 t          | t                    r-t          |           } t          |           rt	          |           } t          |
||          |pi                                 }t          pi           t          || |          \  }	 t          |||          \  }}
f	d|D             }t          d          r                                 n*# t          d          r                                 w w xY w|s$|d|fnd|f\  }}d| d|}t          |          	r:t          | t                    r| nd	fd	|                                D             }|r|S t          t          |                                                    S )
N)rl   rp   ro   rr   c                :   	 i | ]}| |	
 	  	        S ))	parserrx   rq   rn   rv   rp   rk   rt   ru   r   )r]   namerp   ru   rt   r   rv   rn   	reader_fnrq   rk   s     rK   r`   z%_read_spreadsheet.<locals>.<dictcomp>  sY     
 
 
  ))!1)-% / /
 
 

 
 
rM   closeidr   z%no matching sheets found when `sheet_z` is zin-memc                    i | ]@\  }}||                     t          j                                                          AS r   )with_columnsFlitalias)r]   r   framers   workbooks      rK   r`   z%_read_spreadsheet.<locals>.<dictcomp>  sT     
 
 
e %$$QU8__%:%:;M%N%NOO
 
 
rM   )r9   r=   r   r-   r.   _get_read_optionscopydict_initialise_spreadsheet_parser_get_sheet_nameshasattrr   
ValueErrorrd   nextitervalues)r4   rj   rx   rk   rl   rm   rn   rq   rr   rs   rp   ro   rv   rt   ru   
worksheetssheet_namesreturn_multiple_sheetsparsed_sheetsparamvaluerJ   r   r   r   s      `  `` `` ```       @@@rK   r   r     sP   $ &# .#F++&!! 	.%f--F$/  L %*0022N,233 %C% %!Ivz.>j*j/
 /
++
 
 
 
 
 
 
 
 
 
 
 
 $
 
 
 67## 	LLNNN 67## 	LLNNNN	  +5+=h''FJCWuKeKK%KKoo 
'44B66(
 
 
 
 
,2244
 
 
  ]))++,,---s   +C/ /'Ddict[str, Any]c                  | pi                                  } |dk    rtd| v r|rd}t          |          |                     d          |du rd}t          |          d| v r|t          k    rd	}t          |          || d<   |du r	d| vrd| d<   nu|d
k    red| v r|rd}t          |          d| v r| d         |urd}t          |          d| v r|t          k    rd}t          |          || d<   d| vr|| d<   n
|| d<   || d<   | S )zFNormalise top-level parameters to engine-specific 'read_options' dict.r   use_columnsz?cannot specify both `columns` and `read_options["use_columns"]`
header_rowNFzNthe values of `has_header` and `read_options["header_row"]` are not compatibleschema_sample_rowszRcannot specify both `infer_schema_length` and `read_options["schema_sample_rows"]`xlsx2csvrp   z;cannot specify both `columns` and `read_options["columns"]`ro   zNthe values of `has_header` and `read_options["has_header"]` are not compatiblerr   zScannot specify both `infer_schema_length` and `read_options["infer_schema_length"]`)r   r*   getr   )rn   rl   rp   rr   ro   rJ   s         rK   r   r     s    !&B,,..L\))w)SC)#...l++7J%<O<ObC)#..."l22?22fC)#...-@)*<|#C#C)-L&	:		%%7%OC)#...L((\**<<bC)#...#|33?22gC)#....A*+|++)3L&.A*+%/\"rM   r   list[dict[str, Any]]tuple[list[str], bool]c                B     |d|d d}t          |          g } -|+|rdn|d         d         }|                    |           d}nL dk    r#|                    d |D                        d	}n#t          |t                    rt          |t
                     pt           t                    p dk    }t          |t
                    r|fn|pd
xr?d |D             }D ]/}||vrd|}t          |          |                    |           0nt           t                    r fn pd
 fdt          |d          D             }	D ]B}
|	                    |
          x}d|
 }t          |          |                    |           C||fS )zEEstablish sheets to read; indicate if we are returning a dict frames.Nz"cannot specify both `sheet_name` (z) and `sheet_id` ()r   r   Fc              3  &   K   | ]}|d          V  dS )r   Nr   r]   wss     rK   	<genexpr>z#_get_sheet_names.<locals>.<genexpr>  s&      ;;"2f:;;;;;;rM   Tr   c                    h | ]
}|d          S r   r   r   s     rK   	<setcomp>z#_get_sheet_names.<locals>.<setcomp>  s     A A AF A A ArM   z-no matching sheet found when `sheet_name` is c                \    i | ](\  }}d k    s|d         v s
|d         v ||d         )S )r   indexr   r   )r]   idxr   idsnamesrj   s      rK   r`   z$_get_sheet_names.<locals>.<dictcomp>'  sR     " " "CMMR[C%7%72f:;N;N RZ;N;N;NrM      )startz+no matching sheet found when `sheet_id` is )	r   rC   rD   r9   r   r=   r   	enumerater   )rj   rx   rk   r   rJ   r   r   r   known_sheet_namessheet_names_by_idxr   r   r   s   `          @@rK   r   r     sL    
 6`:``S[```ooKJ.!<ttz!}V'<4   !&	Q;;
;;;;;;!% 
H--QjS6Q6Q2Q (H--1} 	 (
C88NZMMj>NB
5 	) !B Aj A A A ) )000R$RRC$S//)""4((((	) ",Hc!:!:N8++BC" " " " " "(1===" " "
  ) ).223777D@MMMC$S//)""4((((...rM   =tuple[Callable[..., pl.DataFrame], Any, list[dict[str, Any]]]c                4   t          |t                    r0t          |                                          st	          |          | dk    rt          d          }ddddd                                D ]\  }}|                    ||           t          |t                    rt          |          } |j
        |fi |}|j        j        }t          ||fS | dk    rft          d          }t          |t                    rt          |          } |j        |fddi|}d t          |          D             }t           ||fS | d	k    r;t          d
d          }	t          |t                    t          |t                    }}
|
s|r/t#          |	j        x}          dk     rd| d}t'          |          |
r|                                }nt          |t*          t,          f          rdd|j        vrd|j        d}t3          |          |j        x}r$t          |                                          r|}n|                                } |	j        |fi |}d t          |j                  D             }t:          ||fS d| }t=          |          )zNInstantiate the indicated spreadsheet parser and establish related properties.r   Fz%f)exclude_hidden_sheetsskip_empty_linesskip_hidden_rowsfloatformatopenpyxl	data_onlyTc                .    g | ]\  }}|d z   |j         dS r   )r   r   )title)r]   ir   s      rK   r   z2_initialise_spreadsheet_parser.<locals>.<listcomp>W  s*    TTTBAE2844TTTrM   r   	fastexcelz0.7.0)min_versionr   
   z5`fastexcel` >= 0.10 is required to read bytes; found r   bzfile z must be opened in binary modec                $    g | ]\  }}|d z   |dS r   r   )r]   r   nms      rK   r   z2_initialise_spreadsheet_parser.<locals>.<listcomp>r  s5     
 
 
-2Qa!eR((
 
 
rM   zunrecognized engine: )r9   r=   r   r@   rE   r'   rd   
setdefaultr<   r	   Xlsx2csvr   sheets_read_spreadsheet_xlsx2csvload_workbookr   _read_spreadsheet_openpyxlr   __version__r(   getvaluer   r   moder   OSErrorreadr   r   _read_spreadsheet_calamineNotImplementedError)rl   r4   rm   r   optionr   r   r   r   r   reading_bytesioreading_bytesmodule_versionrJ   filenames                  rK   r   r   5  s    &# (tF||':':'<'< ('''":.. &+ % %	
 

 %''	5 	5MFE %%fe4444fe$$ 	%V__F""6<<^<<')6699	:		":..fe$$ 	%V__F''QQ$Q.QQTT)FBSBSTTT)6699	:		#KWEEE	vw''vu%% '  	2} 	2-'33N3
 3
3 3 \.[[[C,S111 		'__&&FF ?@@ 	'&+%%KfkKKKcll"$k)( 'tH~~/D/D/F/F '!%%f????
 
6?@R6S6S
 
 
 *6699
,&
,
,C
c
"
""rM   csvr
   	separatorc               b   |                                  dk    rt          |          S |                     d           |i }g }|r|                    di           x}rt	          dd           |                    d|          }	t          |	                              |          rd}
t          |
          i |	|i }}|                                D ]+\  }}|t          k    r|||<   |
                    |           ,|                                }||d<   t          t          | fd	|i||||
          }|rui |j        }}|D ]Z}||         t          k    rGt!          j        |          j                            dd          j                                        ||<   [|r |j        di |}|S )zCTranslate StringIO buffer containing delimited data as a DataFrame.r   Ndtypesz_the `dtypes` parameter for `read_csv` is deprecated. It has been renamed to `schema_overrides`.z0.20.31r   rq   zNcannot specify columns in both `schema_overrides` and `read_options['dtypes']`r  )rh   rv   rt   ru   (?:[ T]00:00:00(?:\.0+)?)$ r   )tell_empty_frameseekr   r   setintersectionr*   rd   r   rC   r   _drop_null_datar/   schemar!   r   colr=   replaceto_dater   )r  r  rn   rq   rt   ru   rv   	date_cols
csv_dtypescsv_schema_overridesrJ   	overridesr   dtyperh   
date_castsr  s                    rK   _csv_buffer_to_framer!  {  s!    xxzzQN+++ HHQKKKI <%))(B777: 	%q!   
  ,//0BJOO#$$112BCC 	/bC)#...&R)=&RAQ&RTV#	"** 	% 	%IB}}', $$  $$$$#((**+;'(	
 

 
 

 &''	
 	
 	
B  
/F
 	 	BbzV##E"II!>CC 2
  	/ ..:..BIrM   )rt   ru   rh   c               n   g }|r| j         D ]}|dk    st          j        d|          r| |         }|j        t          k    s\|                                | j        k    s?|j        t          v rF|                    dd                                          | j        k    r|	                    |           |r
 | j
        | } | j        | j        cxk    rdk    rn nt          |          S |rK|                     t          j        t          j                                                                         S | S )zFIf DataFrame contains columns/rows that contain only nulls, drop them.r  z(_duplicated_|__UNNAMED__)\d+$r   N)rp   rR   matchr  r    
null_countheightr&   r  rC   dropwidthr  filterr   all_horizontalallis_null)rh   rv   rt   ru   	null_colscol_namer  s          rK   r  r    s=    I %
 	/ 	/H 2~~*KX!V!V~lI%%~~''2944	^33KK400;;==JJ $$X... 	%)$B	yBH!!!!!!!!!N+++ ?yy!*1577??+<+<===>>>IrM   c                N    | rd}t          |          t          j                    S )Nz^empty Excel sheet

If you want to read this as an empty DataFrame, set `raise_if_empty=False`.)r)   ra   rb   )rv   rJ   s     rK   r  r    s1     ^ 	 #<>>rM   c                    |rBddl m}m} t          |d         t                    r || n || }|                     |          } | S )Nr   )by_indexby_name)polars.selectorsr0  r1  r9   r   select)rh   rp   r0  r1  colss        rK   _reorder_columnsr5    sc      66666666%/
C%@%@Wxx!!ggwFWYYt__IrM   )rk   r   c               d   t          d          }	t          |	j        x}
          }|dk     rd|v rd|
 d}t          |          |dk     rd|v rd|
 d}t          |          |r|d	k     rd
|
 d}t	          |          |r||d<   |pi }|                    d          dk    r7|d= |dk    rdn't                              t          d          d          |d<   n|r|dk    r|                    di           }|	                                D ]m\  }}||vrd|
                                x}t          v rd||<   ,|t          v rd||<   ;|t          k    rd||<   L|t          k    rd||<   ]|t          k    rd||<   n||d<   |dk     r$ | j        d'd|i|}|                                }n|rG | j        |fi |}|r"||j        k    rd|d|}t)          |          |                                }n | j        |fi |}t-          |          }|                    dd          <|                    d          s'd t          d|j        dz             D             |_        t3          ||||          }|r|                                |j        }}g i }}|	                                D ]=\  }}||         t          k    r|||<   |t8          k    r`|                    t=          j        |          j         !                    tE          |dd           tE          |d d           !                     |tF          k    rZt=          j        |          j         $                    d"d#          }|                    |j         %                                           |tL          k    r@|                    t=          j        |          j         '                                           8|||<   ?|r
 |j(        | }|r|)                    |$          }|*                                }|+                                r!|)                    tX          t          i          }g }|j        	                                D ]a\  }}||vrV|t          v rt=          j        |          -                                .                    t=          j        |                    t=          j        |          /                                z  t=          j        |          )                    t`                    g}|                    |           |t8          k    rt=          j        |          j1        2                                3                    te          ddd                    t=          j        |          )                    tF                    g}|                    |           c|rX|4                    d% |D                       5                    d          }d& tm          ||          D             x} r
 |j(        |  }|S )(Nr   )r   	   r   zYa more recent version of `fastexcel` is required for 'schema_sample_rows' (>= 0.9; found r   )r   r      r   zUa more recent version of `fastexcel` is required for 'use_columns' (>= 0.10.2; found )r      zTa more recent version of `fastexcel` is required for 'table_name' (>= 0.12.0; found r   )r   r9  r   rQ   i @  r  r   r   floatdurationboolean)r      r8  r   table named  not found in sheet r   Fcolumn_namesc                    g | ]}d | S column_r   )r]   r   s     rK   r   z._read_spreadsheet_calamine.<locals>.<listcomp>?  s    HHHA-A--HHHrM   r   rv   rt   ru   	time_unit	time_zonerE  rF  r  r  )r  c              3  N   K   | ] }|d                               d          V  !dS )r   T)ignore_nullsN)r*  )r]   ds     rK   r   z-_read_spreadsheet_calamine.<locals>.<genexpr>}  s4      PPqqtxxTx::PPPPPPrM   c                "    g | ]\  }\  }}|
|S r   r   )r]   apply_casts       rK   r   z._read_spreadsheet_calamine.<locals>.<listcomp>~  s5     
 
 
%UIQ

 
 
rM   r   )7r'   r   r   r(   r   r   r   fromkeysrangerd   	base_typer%   r$   r!   r   r   load_sheet_by_name	to_polars
load_tablerx   RuntimeErrorload_sheet_eagerr   r'  rp   r  lazyr  r   rC   r   r  r=   to_datetimegetattrr   r  r  r"   to_timer   rN  collectis_emptyr    floor
eq_missing
is_not_nanr   dtr   eqr3  rowzip)!r   rx   rn   rq   rp   rk   rt   ru   rv   r   original_versionfastexcel_versionrJ   parser_dtypesr   r  
base_dtyper   rh   xl_tablews_arrowlfr  str_to_temporalupdated_overridesr   tpdt_strtype_checksc
check_cast
apply_castdowncasts!                                    rK   r  r    s     ,,I%):O&O&6PP6!!&:l&J&J}jz}}}(---:%%-<*G*Gyfvyyy(--- ''11xeuxxxoo .&-]#'-2,--22-. !J.. HuU||X66 	X
 
 //7::$((266+1133 	4 	4KD%=(("'//"3"33JFF*/M$''<//*1M$''6))*2M$''8++*4M$''7***3M$'!.X:%%&V&GGJG,GG\\^^ 	&(v(DD|DDH (jH,???UZUUzUU"3'''##%%BB.v.zJJ\JJHH%%BL%008AQAQB
 B
8 IHq"(Q,1G1GHHHBJ	
%''	
 
 
B  WWYY	F-/*&,,.. 	+ 	+FBbzV##(*!"%%x&&E"IIM--")"k4"@"@")"k4"@"@ .      tr../LbQQ&&vz'9'9';';<<<<t&&quRyy}'<'<'>'>????(*!"%% 	3 /2B 	3 122BZZ\\ 
{{}} %WWdF^$$ KIOO%% / /5$$$$$E!HHNN$$//a99AE!HH<O<O<Q<QQE!HHMM%((
 "":....(""E!HHK$$&&))$q!Q--88E!HHMM$''
 "":... ,YYPPKPPPPPTTUVWW

 
),Z)E)E
 
 
 
8 	, !(+BIrM   c          
        |                     dd          }	|                     dd          }
|pi }|	dk    }g }|rO|sMd\  }}| j        D ]}|dz  }||j        v r||j        }} n|"|rd|d	|nd
|d|d}t	          |          n| |         }t          |dd          x}r|r||         n-t          t          |                                                    }t          ||j
                           }|st          |          S |
r3|                    d |                     d          D                        nC|                    d t          dt          |d                   dz             D                        |j        r|d|j                  }|}n|rd
|d|d}t	          |          |
set          |                                          x}st          |          S t          |d                   }d t          d|dz             D             }nU|                                }|D ]>}d |D             }t#          d |D                       r|                    |            n?|rt$          nd}g }t'          |t'          |           D ]m\  }}|s|sbd |D             }|s |                    |          x}t$          k    rd |D             }|                    |          x}t*          t,          t.          fv rt1          j        ||d          }|j        t$          k    r|t,          k    r;|j                            t          |dd          t          |dd                    }n{|t*          k    r3|j                            dd          j                                        }n=|t.          k    r|j                                        }nt1          j        |||d          }|                     |           otC          d |D                       }t1          j"        tG          t'          ||                    ||	d          } tI          | |||          } tK          | |          } | S ) zAUse the 'openpyxl' library to read data from the given worksheet.rr   Nro   Tr   )Nr   r   r>  r?  zno named tables found in sheet z (looking for r   tablesc              3  $   K   | ]}|j         V  d S r   r   r]   cells     rK   r   z-_read_spreadsheet_openpyxl.<locals>.<genexpr>  s$      ==$*======rM   c              3      K   | ]	}d | V  
dS )rC  Nr   r]   ns     rK   r   z-_read_spreadsheet_openpyxl.<locals>.<genexpr>  s(      LLA-A--LLLLLLrM   c                    g | ]}d | S rB  r   r{  s     rK   r   z._read_spreadsheet_openpyxl.<locals>.<listcomp>  s    BBBmmmBBBrM   c                    g | ]	}|j         
S r   rw  rx  s     rK   r   z._read_spreadsheet_openpyxl.<locals>.<listcomp>  s    999Tdj999rM   c              3     K   | ]}|d uV  	d S r   r   r]   r_   s     rK   r   z-_read_spreadsheet_openpyxl.<locals>.<genexpr>  s&      99q}999999rM   c                    g | ]	}|j         
S r   rw  rx  s     rK   r   z._read_spreadsheet_openpyxl.<locals>.<listcomp>  s    999Tdj999rM   c                4    g | ]}|t          |          n|S r   )r=   r  s     rK   r   z._read_spreadsheet_openpyxl.<locals>.<listcomp>  s&    KKKqQ]#a&&&KKKrM   F)strictrE  rF  rG  r  r  )r  r  c              3  $   K   | ]}|j         V  d S r   r   )r]   rN   s     rK   r   z-_read_spreadsheet_openpyxl.<locals>.<genexpr>  s$      ::af::::::rM   )rq   rr   r  rD  )&popr   ru  r   rU  rY  r   r   r   rc   refr  rD   rP  lentotalsRowCount	iter_rowsanyr!   rc  r   r   r   r"   ra   Seriesr  r=   rX  r  r  rZ  rC   r   rb   r   r  r5  )!r   rx   rn   rq   rp   rk   rt   ru   rv   rr   ro   no_inferenceheadern_tablesrg   r   rJ   ru  tablerows	rows_itern_colsrb  
row_valuesr  series_datar   column_datar   rm  rN   r   rh   s!                                    rK   r   r     s6    '**+@$GG!!,55J'-2&!+L!F  *  &
H& 	 	EMHU\))!&J *  cOzOOOOObzbbS]bbb 
 s###  J Xt,,,v &0Qz""d4;P;P6Q6QBuyM"" 	0/// 	MMM==!=======MMLLq#d1g,,:J1K1KLLLLLL 	1/5///0D			 [
[[J[[[3 	!%bllnn!5!55I 4#N3331&&FBBU1fqj-A-ABBBFFI   99S999
99j99999 MM*---E #,FFEK i99 " "k 	" 	"99[999F L)9)=)=d)C)C CNN LKFKKK&**4000dHd5KKKIdF59997f$$X~~E--&-b+t&D&D&-b+t&D&D .   tEMM92 ggii  tEMMOOIdF%FFFq!!!::k:::::E	S$$%%)/	
 
 
B 

%''	
 
 
B 
"g	&	&BIrM   c          	        |rd}	t          |	          t                      }
t          j                    5  t          j        dt
                     |                     |
|           ddd           n# 1 swxY w Y   |                    dd           |r||d<   g }|rn|                                D ]Y\  }}|t          k    rIt          ||<   |                    t          j        |                              t                               Zt          |
d	|||||
          }|r
 |j        | }|                    t$                    }t'          ||          S )zAUse the 'xlsx2csv' library to read data from the given worksheet.zDthe `table_name` parameter is not supported by the 'xlsx2csv' engineignore)category)outfile	sheetnameNtruncate_ragged_linesTrp   ,)r  rn   rq   rv   rt   ru   )r   r
   warningscatch_warningsfilterwarningsDeprecationWarningconvertr   rd   r   r#   rC   r   r  rN  r!  r   renamerT   r5  )r   rx   rn   rq   rp   rk   rt   ru   rv   rJ   
csv_buffercast_to_booleanr  r  rh   s                  rK   r   r     s     TooJ		 	"	" A A 	3EFFFFzZ@@@	A A A A A A A A A A A A A A A 3T::: *")YO A*0022 	A 	AJC(- %&&quSzzw'?'?@@@	!)%''
 
 
B  /R_o.	*	+	+BB(((s   3A44A8;A8)r4   r2   r5   r6   )rN   r=   r5   r=   )rU   rV   rF   rW   r5   r   ) r4   r2   rj   rw   rx   r=   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   ) r4   r2   rj   rw   rx   rw   rk   ry   rl   r1   rm   rz   ro   rW   rn   rz   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   ) r4   r2   rj   r   rx   r=   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   ) r4   r2   rj   r   rx   rw   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   ) r4   r2   rj   r   rx   rw   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   ) r4   r2   rj   rw   rx   r   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   ) r4   r2   rj   r   rx   r   rk   ry   rl   r1   rm   rz   rn   rz   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   )r4   r2   rj   rw   rx   r=   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   )r4   r2   rj   rw   rx   rw   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   )r4   r2   rj   r   rx   r=   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   )r4   r2   rj   r   rx   rw   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   )r4   r2   rj   r   rx   rw   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r~   )r4   r2   rj   rw   rx   r   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   )r4   r2   rj   r   rx   r   ro   rW   rp   r{   rq   r|   rr   r}   rs   ry   rt   rW   ru   rW   rv   rW   r5   r   ) r4   r   rj   r   rx   r   rk   ry   rl   r1   rm   rz   rn   rz   rq   r|   rr   r}   rs   ry   rp   r{   ro   rW   rv   rW   rt   rW   ru   rW   r5   r   )rn   rz   rl   r1   rp   r{   rr   r}   ro   rW   r5   r   )
rj   r   rx   r   rk   ry   r   r   r5   r   )rl   ry   r4   r   rm   r   r5   r   )r  r
   r  r=   rn   r   rq   r|   rt   rW   ru   rW   rv   rW   r5   r~   )
rh   r~   rv   rW   rt   rW   ru   rW   r5   r~   )rv   rW   r5   r~   )rh   r~   rp   r{   r5   r~   )r   r   rx   ry   rn   r   rq   r|   rp   r{   rk   ry   rt   rW   ru   rW   rv   rW   r5   r~   )Y
__future__r   r>   rR   r  collectionsr   collections.abcr   datetimer   r   ior   r	   r
   r   pathlibr   typingr   r   r   r   r   r   polars._reexport	_reexportra   polarsr   r   r   polars._utils.deprecationr   r   polars._utils.variousr   r   r   polars.datatypesr   r   r   r   r   r   r    r!   r"   r#   polars.datatypes.groupr$   r%   r&   polars.dependenciesr'   polars.exceptionsr(   r)   r*   polars.functionsr,   polars.io._utilsr-   r.   polars.io.csv.functionsr/   r0   polars._typingr1   r2   r3   rL   rT   ri   r   r   r   r   r   r   r!  r  r  r5  r  r   r   r   rM   rK   <module>r     s   " " " " " " 				 				  # # # # # # $ $ $ $ $ $             ? ? ? ? ? ? ? ? ? ? ? ?       G G G G G G G G G G G G G G G G             ! ! ! ! ! !        W V V V V V V V V V                        P O O O O O O O O O / / / / / /         
 $ # # # # # = = = = = = = = , , , , , , NMMMMMMMMMM, , , ,<> > > >
W W W W2 
  %(,/*-47*-&)%(!     
( 
  %(,/*-47*-&)%(!     
( 
 !%(,/*-47*-&)%(!     
, 

  %(,/*-47*-&)%(!" " " " " 
"( 

  %(,/*-47*-&)%(!     
( 
 !%(,/*-47*-&)%(!" " " " " 
"( /1A8TTT/RRR ,06:!%/,0*.48*.&5%)  !x x x x x SR UTxv 
 47*-&)%(     
  
 47*-&)%(     
  
 47*-&)%(     
  

 47*-&)%(" " " " " 
"  

 47*-&)%(     
  
 47*-&)%(" " " " " 
"& ,06:48*.&5%)  l l l l l ll -1*.*.&5%)48  !I. I. I. I. I. I.X3 3 3 3l// // // //dC# C# C# C#LA A A AP !      D      $ "M M M M M Mn "m m m m m mn "0) 0) 0) 0) 0) 0) 0) 0)rM   