
    M/Phy(                        d dl mZ d dlmZ d dlmZmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZmZ d dlmZ d dlmZ d dlZd d	lmZmZmZ d!dZ G d de          Zd"dZd Zd Z d Z!d Z"d Z#d#dZ$d Z%d$dZ&d%dZ'd%dZ(d%dZ)d Z*d&d Z+dS )'    )lrange)StringIO)environmakedirs)abspathdirnameexists
expanduserjoinN)	HTTPErrorURLError)urljoin)urlopen)Indexread_csv
read_stata%https://www.stata-press.com/data/r11/Tc                 F    t          || dz             }t          |          S )a  
    Download and return an example dataset from Stata.

    Parameters
    ----------
    data : str
        Name of dataset to fetch.
    baseurl : str
        The base URL to the stata datasets.
    as_df : bool
        Deprecated. Always returns a DataFrame

    Returns
    -------
    dta : DataFrame
        A DataFrame containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Does not do any
    error checking in response URLs.
    z.dta)r   r   )databaseurlas_dfurls       Z/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/datasets/utils.pywebuser      s#    6 '4;
'
'Cc??    c                       e Zd Zd Zd ZdS )Datasetc                     d | _         d | _        d | _        d | _        t                              | |           | | _        	 | j                            t                    | _	        d S #  Y d S xY wN)
endogexogr   namesdict__init____dict__astypefloatraw_data)selfkws     r   r$   zDataset.__init__/   si    
		
dB	 I,,U33DMMM	DDs    $A& &A+c                 *    t          | j                  S r   )str	__class__)r)   s    r   __repr__zDataset.__repr__?   s    4>"""r   N)__name__
__module____qualname__r$   r.    r   r   r   r   .   s2           # # # # #r   r   c                 j   | j         }t          |t                    r^||         }| |                                         }||                     |gd          }n| ||                                                  }n| j        d d |f                                         }t          |j                   }||                     |d          }nVt          |t                    r!| ||                                                  }n | ||                                                  }|Yt          | j        d d |f                   }||_	        |                                |_	        | 
                    ||                   } t          |j                   }	t          | t          |          ||||	          }
|
S )N   )axis)r   r"   r    r!   
endog_name	exog_name)columns
isinstanceintcopydroploclistr   ilocindex	set_indexr   )r   	endog_idxexog_idx	index_idxr"   r6   r    r!   r@   r7   datasets              r   process_pandasrF   C   s   LE)S!! 09%
Z %%''99j\922DDh(--//DDI&++--%-((
99Za900DD#&& 	0h(--//DDh(--//Ddi9-..ZZ\\
~~eI.//T\""I4tE{{%J)M M MGNr   c           
          | j                             t          t          dt	          |           dz                                 r|                     d          } | S )z
    All the Rdatasets have the integer row.labels from R if there is no
    real index. Strip this for a zero-based index
    r4   T)r<   )r@   equalsr   r   lenreset_index)r   s    r   _maybe_reset_indexrK   c   sS    
 zvaTQ778899 +T**Kr   c                 Z    | du rd } n#| du rt          d           } nt          |           } | S )NFT)get_data_home)caches    r   
_get_cacherO   m   s<    ~~	$d##e$$Lr   c                     dd l }t          |d          5 }|                    |                    |                      d d d            d S # 1 swxY w Y   d S )Nr   wb)zlibopenwritecompress)r   
cache_pathrR   zfs       r   	_cache_itrX   x   s    KKK	j$		 &2
t$$%%%& & & & & & & & & & & & & & & & & &s   )AAAc                     dd l }t          | d          5 }|                    |                                          cd d d            S # 1 swxY w Y   d S )Nr   rb)rR   rS   
decompressread)rV   rR   rW   s      r   _open_cacher]   ~   s    KKK	j$		 *2rwwyy))* * * * * * * * * * * * * * * * * *s   'A		AAc                    d}||                      d          d                             dd          }|                     d          }t          |          dk    r|d	xx         d
z  cc<   n|dxx         d
z  cc<   d                    |          dz   }t          ||          }	 t	          |          }d}n#  Y nxY w|s5t          | d                                          }|t          ||           ||fS )z
    Tries to load data from cache location otherwise downloads it. If it
    downloads the data and cache is not None then it will put the downloaded
    data in the cache path.
    FNz:///,.r4   z-v2r   z.zipT   )timeout)splitreplacerI   r   r]   r   r\   rX   )r   rN   
from_cache	file_namerV   r   s         r   _urlopen_cachedrj      s    JIIe$$R(00c::	OOC((	y>>AbMMMU"MMMMaLLLE!LLLHHY''&0	%++
	z**DJJ	D  (sA&&&++--dJ'''s   &B8 8B<csvc                     | |dz   |z  z   }	 t          ||          \  }}n7# t          $ r*}dt          |          v rt          d|z            |d }~ww xY w|                    dd          }t          |          |fS )Nz.%s404zDataset %s was not found.utf-8strict)rj   r   r,   
ValueErrordecoder   )base_urldatanamerN   	extensionr   r   rh   errs           r   	_get_datarv      s    
h&)3
3C*366jj   CHH88CDDDI	 ;;w))DD>>:%%s   ! 
A%AAc           	      x   d}t          ||          \  }}|                    dd          }t          t          |                    }t	          j        |j        | k    |j        |k              }|                                st          d|  d| d| d          |j
        |         }|d         j        d	         S )
NzRhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/datasets.csvrn   ro   zItem z from Package z& was not found. Check the CSV file at z  to verify the Item and Package.Titler   )rj   rq   r   r   nplogical_andItemPackageanyrp   r=   r?   )	rs   packagerN   	index_urlr   _r@   idxdataset_metas	            r   _get_dataset_metar      s    1Ii//GD!;;w))DXd^^$$E
.x/'1I
J
JC7799 
KH K KG K K(K K K
 
 	
 9S>L %a((r   datasetsFc                 @   d|z   dz   }d|z   dz   }t          |          }t          || |          \  }}t          |d          }t          |          }t	          | ||          }t          || |d          \  }}	t          ||                                |||          S )	a7  download and return R dataset

    Parameters
    ----------
    dataname : str
        The name of the dataset you want to download
    package : str
        The package in which the dataset is found. The default is the core
        'datasets' package.
    cache : bool or str
        If True, will download this data into the STATSMODELS_DATA folder.
        The default location is a folder called statsmodels_data in the
        user home folder. Otherwise, you can specify a path to a folder to
        use for caching the data. If False, the data will not be cached.

    Returns
    -------
    dataset : Dataset
        A `statsmodels.data.utils.Dataset` instance. This objects has
        attributes:

        * data - A pandas DataFrame containing the data
        * title - The dataset title
        * package - The package from which the data came
        * from_cache - Whether not cached data was retrieved
        * __doc__ - The verbatim R documentation.

    Notes
    -----
    If the R dataset has an integer index. This is reset to be zero-based.
    Otherwise the index is preserved. The caching facilities are dumb. That
    is, no download dates, e-tags, or otherwise identifying information
    is checked to see if the data should be downloaded again or not. If the
    dataset is in the cache, it's used.
    zJhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/r`   zJhttps://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/doc/z/rst/r   )	index_colrst)r   __doc__r~   titlerh   )rO   rv   r   rK   r   r   r\   )
rs   r~   rN   data_base_urldocs_base_urlr   rh   r   docr   s
             r   get_rdatasetr      s    J##*++./M##*++23MuE %@@D*DA&&&Dd##Dh77E}hu==FCchhjj'(* * * *r   c                     | #t          j        dt          dd                    } t          |           } t	          |           st          |            | S )a
  Return the path of the statsmodels data dir.

    This folder is used by some large dataset loaders to avoid
    downloading the data several times.

    By default the data dir is set to a folder named 'statsmodels_data'
    in the user home folder.

    Alternatively, it can be set by the 'STATSMODELS_DATA' environment
    variable or programatically by giving an explicit folder path. The
    '~' symbol is expanded to the user home folder.

    If the folder does not already exist, it is automatically created.
    NSTATSMODELS_DATA~statsmodels_data)r   getr   r
   r	   r   	data_homes    r   rM   rM      s]     K 2 $S*< = =? ?	9%%I) r   c                 L    t          |           } t          j        |            dS )z.Delete all the content of the data home cache.N)rM   shutilrmtreer   s    r   clear_data_homer     s%    i((I
M)r   c                 d    | dn| } 	 t          |            n# t          $ r}Y d}~dS d}~ww xY wdS )zCheck if internet is availableNzhttps://github.comFT)r   r   )r   ru   s     r   check_internetr     sT    "%+

3C   uuuuu4s    
--c                 4   g }| D ]}|                     d          r |                    d          r|dd         }n?|                     d          r|dd         }n|                    d          r
|dd         }|                    |           || _        | S )a
  
    Remove leading and trailing single quotes

    Parameters
    ----------
    df : DataFrame
        DataFrame to process

    Returns
    -------
    df : DataFrame
        DataFrame with stripped column names

    Notes
    -----
    In-place modification
    'r4   r_   N)
startswithendswithappendr8   )dfr8   cs      r   strip_column_namesr   "  s    $ G  << 	!**T"2"2 	!B$AA\\$ 	!""AAZZ 	#2#AqBJIr   ra   c                     t          t          |                     }t          ||          }|dk    rdnd}i }|dk    rddi}t          |f||d|}|r|                    t
                    }|S )zStandard simple csv loaderra   pythonr   float_precisionhigh)sepengine)r   r   r   r   r&   r'   )		base_filecsv_namer   convert_floatfilepathfilenamer   r   r   s	            r   load_csvr   A  s    wy))**HHX&&HXXFO}},f5HH#fHHHHD "{{5!!Kr   )r   T)r   NN)rk   )r   Fr   )ra   F),statsmodels.compat.pythonr   ior   osr   r   os.pathr   r   r	   r
   r   r   urllib.errorr   r   urllib.parser   urllib.requestr   numpyry   pandasr   r   r   r   r#   r   rF   rK   rO   rX   r]   rj   rv   r   r   rM   r   r   r   r   r2   r   r   <module>r      s   , , , , , ,                       > > > > > > > > > > > > > >  , , , , , , , ,             " " " " " "     . . . . . . . . . .   ># # # # #d # # #*   @    & & &* * *  <& & & &) ) )$2* 2* 2* 2*n   0        >     r   