
    0Php                     h   d Z ddlZddlZddlmZmZ ddlmZmZm	Z	 ddl
mZ ddlZddlZddlmZ ddlmZmZ d	d
lmZ d	dlmZmZmZmZmZ  eddd          Z ej        e          Z ee edgdgdgdg eed	dd          g eeddd          gdd          dddddddd            Z!dS )a:  California housing dataset.

The original database is available from StatLib

    http://lib.stat.cmu.edu/datasets/

The data contains 20,640 observations on 9 variables.

This dataset contains the average house value as target variable
and the following input variables (features): average income,
housing average age, average rooms, average bedrooms, population,
average occupation, latitude, and longitude in that order.

References
----------

Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
Statistics and Probability Letters, 33 (1997) 291-297.

    N)IntegralReal)PathLikemakedirsremove)exists   )Bunch)Intervalvalidate_params   )get_data_home)RemoteFileMetadata_convert_data_dataframe_fetch_remote_pkl_filepath
load_descrzcal_housing.tgzz.https://ndownloader.figshare.com/files/5976036@aaa5c9a6afe2225cc2aed2723682ae403280c4a3695a2ddda4ffb5d8215ea681)filenameurlchecksumbooleanleft)closedg        neither)	data_homedownload_if_missing
return_X_yas_frame	n_retriesdelayT)prefer_skip_nested_validationF   g      ?c                 8   t          |           } t          |           st          |            t          | d          }t          |          s|st	          d          t
                              d                    t          j	        |                      t          t          | ||          }t          j        d|          5 }t          j        |                    d          d	
          }	g d}
|	dd|
f         }	t!          j        |	|d           ddd           n# 1 swxY w Y   t%          |           nt!          j        |          }	g d}|	dddf         |	ddddf         }}|dddfxx         |dddf         z  cc<   |dddfxx         |dddf         z  cc<   |dddf         |dddf         z  |dddf<   |dz  }t)          d          }|}|}d}dg}|rt+          d||||          \  }}}|r||fS t-          ||||||          S )a  Load the California housing dataset (regression).

    ==============   ==============
    Samples total             20640
    Dimensionality                8
    Features                   real
    Target           real 0.15 - 5.
    ==============   ==============

    Read more in the :ref:`User Guide <california_housing_dataset>`.

    Parameters
    ----------
    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : bool, default=False
        If True, returns ``(data.data, data.target)`` instead of a Bunch
        object.

        .. versionadded:: 0.20

    as_frame : bool, default=False
        If True, the data is a pandas DataFrame including columns with
        appropriate dtypes (numeric, string or categorical). The target is
        a pandas DataFrame or Series depending on the number of target_columns.

        .. versionadded:: 0.23

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    dataset : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : ndarray, shape (20640, 8)
            Each row corresponding to the 8 feature values in order.
            If ``as_frame`` is True, ``data`` is a pandas object.
        target : numpy array of shape (20640,)
            Each value corresponds to the average
            house value in units of 100,000.
            If ``as_frame`` is True, ``target`` is a pandas object.
        feature_names : list of length 8
            Array of ordered feature names used in the dataset.
        DESCR : str
            Description of the California housing dataset.
        frame : pandas DataFrame
            Only present when `as_frame=True`. DataFrame with ``data`` and
            ``target``.

            .. versionadded:: 0.23

    (data, target) : tuple if ``return_X_y`` is True
        A tuple of two ndarray. The first containing a 2D array of
        shape (n_samples, n_features) with each row representing one
        sample and each column representing the features. The second
        ndarray of shape (n_samples,) containing the target samples.

        .. versionadded:: 0.20

    Notes
    -----

    This dataset consists of 20,640 samples and 9 features.

    Examples
    --------
    >>> from sklearn.datasets import fetch_california_housing
    >>> housing = fetch_california_housing()
    >>> print(housing.data.shape, housing.target.shape)
    (20640, 8) (20640,)
    >>> print(housing.feature_names[0:6])
    ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup']
    )r   zcal_housing.pkzz1Data not found and `download_if_missing` is Falsez&Downloading Cal. housing from {} to {})dirnamer    r!   zr:gz)modenamez"CaliforniaHousing/cal_housing.data,)	delimiter)	      r	   r#            r   r   Nr.   )compress)MedIncHouseAgeAveRooms	AveBedrms
PopulationAveOccupLatitude	Longituder   r   r	   r-   r#   r,   g     j@zcalifornia_housing.rstMedHouseValfetch_california_housing)datatargetframetarget_namesfeature_namesDESCR)r   r   r   r   OSErrorloggerinfoformatARCHIVEr   r   tarfileopennploadtxtextractfilejoblibdumpr   loadr   r   r
   )r   r   r   r   r    r!   filepatharchive_pathfcal_housingcolumns_indexr>   r;   r:   descrXyr<   r=   s                      d/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/datasets/_california_housing.pyr9   r9   8   s   X 	222I) Y(9::H( ," 	OMNNN4;;GKSS	
 	
 	
 %	
 
 
 \vL999 		;Q*BCCs  K
 877M%aaa&67KKX::::		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 		; 	| k(++	 	 	M qqq!t$k!!!QRR%&8DF 	AJJJ$qqq!t*JJJ 	AJJJ$qqq!t*JJJ aaadd111a4j(DAJ hF/00EAAEL  
-&fm\
 
q!  !t!#   s   AD""D&)D&)"__doc__loggingrE   numbersr   r   osr   r   r   os.pathr   rJ   numpyrG   utilsr
   utils._param_validationr   r    r   _baser   r   r   r   r   rD   	getLogger__name__rA   strr9        rU   <module>re      s   0   " " " " " " " " ) ) ) ) ) ) ) ) ) )                  ? ? ? ? ? ? ? ?                    
8O   
	8	$	$ 8T* ){ kKhxD@@@A(4d9===>  #'
 
 
 
u u u u
 
u u urd   