
    Q/PhO                     V   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	m
Z
  ej                    5   ej        d           d dlZddd           n# 1 swxY w Y    e
            sd dlmZ  ed          Znd dlmZ d d	lmZmZ d dlZd dlZd d
lmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB  e@d          ZCe G d de<e$                      ZDdS )     N)Path)AnyOptionalUnion)is_pyspark_installedignore)TypeVar
sDataFrame)	DataFrame)asdictis_dataclass)tqdm)typechecked)VisionsTypeset)ConfigSettingsSparkSettings)ExpectationsReport)BaseDescription)	AlertType)describe)Sample)BaseSummarizerProfilingSummarizerformat_summaryredact_summary)ProfilingTypeSet)get_report_structure)Root)create_html_assets)SerializeReport)hash_dataframe)ProfilingLogger)
get_configReportLogger)namec                      e Zd ZU dZdZdZdZdZdZe	e
d<   	 	 	 	 	 	 	 	 	 	 	 	 	 d3deeej        ef                  deded	ee         d
ededee         deeeef                  dedee         dee         dee	         dee         fdZedeeej        ef                  dededeeeef                  deddfd            Zedeeej        ef                  de	deeej        ef                  fd            Zd4dee         ddfdZedee         fd            Zedefd            Zedefd            Zedee         fd            Z ede!fd            Z"edefd            Z#edefd            Z$ede%fd             Z&deej                 fd!Z'defd"Z(defd#Z)de*fd$Z+d5d%eeef         d&eddfd'Z,defd(Z-de%fd)Z.defd*Z/defd+Z0defd,Z1d6d-Z2d6d.Z3d6d/Z4defd0Z5	 d4d1d dee	         dd fd2Z6dS )7ProfileReportzGenerate a profile report from a Dataset stored as a pandas `DataFrame`.

    Used as is, it will output its content as an HTML report in a Jupyter notebook.
    NconfigFTdfminimaltsmodesortby	sensitiveexplorativesampleconfig_filelazytypeset
summarizertype_schemac                 V   |                      |||||	           t          |          | _        |s|r3|st          d          }t	                                          |          }n<||}n7t          |t          j                  rt	                      }nt                      }|df|dfg}t          d |D                       rht	                      }|D ].\  }}|r'|                    t          j        |                    }/|                    |                    d                    }t          |          dk    r_t          j        |          \  }}|                    t	                                          |                              d                    }|r|                    |          }||j        j        _        |r|r||j        j        _        |                     ||          | _        || _        d| _        || _        || _        |
| _        || _        |	s	| j        }dS dS )	a  Generate a ProfileReport based on a pandas or spark.sql DataFrame

        Config processing order (in case of duplicate entries, entries later in the order are retained):
        - config presets (e.g. `config_file`, `minimal` arguments)
        - config groups (e.g. `explorative` and `sensitive` arguments)
        - custom settings (e.g. `config` argument)
        - custom settings **kwargs (e.g. `title`)

        Args:
            df: a pandas or spark.sql DataFrame
            minimal: minimal mode is a default configuration with minimal computation
            ts_mode: activates time-series analysis for all the numerical variables from the dataset.
            Only available for pd.DataFrame
            sort_by: ignored if ts_mode=False. Order the dataset by a provided column.
            sensitive: hides the values for categorical and text variables for report privacy
            config_file: a config file (.yml), mutually exclusive with `minimal`
            lazy: compute when needed
            sample: optional dict(name="Sample title", caption="Caption", data=pd.DataFrame())
            typeset: optional user typeset to use for type inference
            summarizer: optional user summarizer to generate custom summary output
            type_schema: optional dict containing pairs of `column name`: `type`
            **kwargs: other arguments, for valid arguments, check the default configuration file.
        zconfig_minimal.yamlNr/   r.   c              3       K   | ]	\  }}|V  
d S N ).0	condition_s      ^/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/ydata_profiling/profile_report.py	<genexpr>z)ProfileReport.__init__.<locals>.<genexpr>   s&      44\Yy444444    T)exclude_defaultsr   )_ProfileReport__validate_inputstype_df_typer$   r   	from_file
isinstancepdr   r   anyupdater   get_arg_groupsdictlen
shorthandsvars
timeseriesactiver-   $_ProfileReport__initialize_dataframer*   r)   _df_hash_sample_type_schema_typeset_summarizerreport)selfr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r)   r5   kwargsreport_configgroupscfgr;   keyrL   r<   s                         r=   __init__zProfileReport.__init__D   sI   R 	r7FKFFFR 	0' 	0 @()>??$JJ00==MM"MM"bl++ 0 (

 - -($

 44V44444 	R**C"( A A	3 A**V%:3%?%?@@C)0041P1PQQMv;;??!'!26!:!:J)00

!!*--22D2II M  	9)0088M/5%, 	:f 	:39M)0--b-@@#'% 	AAA	 	r?   returnc                 8   | |st          d          ||rt          d          t          | t          j                  r| | j        rt          d          d S d S |rt          d          | (| j                                        rt          d          d S d S )Nz3Can init a not-lazy ProfileReport with no DataFramez=Arguments `config_file` and `minimal` are mutually exclusive.z8DataFrame is empty. Pleaseprovide a non-empty DataFrame.zFTime-Series dataset analysis is not yet supported for Spark DataFrames)
ValueErrorrE   rF   r   emptyNotImplementedErrorrddisEmpty)r*   r+   r,   r1   r2   s        r=   __validate_inputszProfileReport.__validate_inputs   s     :d:RSSS"w"O  
 b",'' 	~"(~ Q   ~~~
  )\  
 26>>#3#3 Q   r?   rY   c                    t                               | |j        j        j                   | t          | t          j                  r|j        j        j        r}|j        j        j        rX| 	                    |j        j        j                  } | 
                    |j        j        j        d          } d | j        _        n|                                 } | S )N)r*   rN   )byF)drop)loggerinfo_def_reportrM   rN   rO   rE   rF   r   r-   sort_values	set_indexindexr&   
sort_index)r*   rY   s     r=   __initialize_dataframez$ProfileReport.__initialize_dataframe   s    
 	$)4; 	 	
 	
 	
 N2r|,, "-4  !,3 %^^}'9'D'K^LL\\-"4"?"FU\SS $]]__	r?   subsetc                     ||dvrt          d          ||dv rd| _        d| _        d| _        ||dk    rd| _        |	d| _        dS dS )aT  Invalidate report cache. Useful after changing setting.

        Args:
            subset:
            - "rendering" to invalidate the html, json and widget report rendering
            - "report" to remove the caching of the report structure
            - None (default) to invalidate all caches

        Returns:
            None
        N)	renderingrV   z:'subset' parameter should be None, 'rendering' or 'report'rV   )r`   _widgets_json_html_report_description_set)rW   rp   s     r=   invalidate_cachezProfileReport.invalidate_cache   s     &0G"G"GL   >V'>>> DMDJDJ>Vx//DL>$(D!!! >r?   c                 \    | j         t          | j        | j                  | _         | j         S r8   )rT   r   r)   rS   rW   s    r=   r3   zProfileReport.typeset   s(    = ,T[$:KLLDM}r?   c                     | j         2d}| j        t          j        urd}t	          | j        |          | _         | j         S )NFT)	use_spark)rU   rC   rF   r   r   r3   )rW   r|   s     r=   r4   zProfileReport.summarizer  sD    #I}BL00 	24<9UUUDr?   c                     | j         1t          | j        | j        | j        | j        | j                  | _         | j         S r8   )rw   describe_dfr)   r*   r4   r3   rR   rz   s    r=   description_setzProfileReport.description_set  sA     ($/% %D! $$r?   c                 ^    | j          | j        t          | j                  | _         | j         S r8   )rQ   r*   r"   rz   s    r=   df_hashzProfileReport.df_hash  s*    = TW%8*4733DM}r?   c                 \    | j         t          | j        | j                  | _         | j         S r8   )rv   r   r)   r   rz   s    r=   rV   zProfileReport.report  s(    </T=QRRDL|r?   c                 P    | j         |                                 | _         | j         S r8   )ru   _render_htmlrz   s    r=   htmlzProfileReport.html#  %    :**,,DJzr?   c                 P    | j         |                                 | _         | j         S r8   )rt   _render_jsonrz   s    r=   jsonzProfileReport.json)  r   r?   c                     t          | j        j        d         t                    r2t	          | j        j        d                   dk    rt          d          | j        |                                 | _        | j        S )Nn   z[Widgets interface not (yet) supported for comparing reports, please use the HTML rendering.)rE   r   tablelistrK   RuntimeErrorrs   _render_widgetsrz   s    r=   widgetszProfileReport.widgets/  sy     t+1#6==	D(.s344q88m   =  0022DM}r?   c                     | j         j        S )zGet duplicate rows and counts based on the configuration

        Returns:
            A DataFrame with the duplicate rows and their counts.
        )r   
duplicatesrz   s    r=   get_duplicateszProfileReport.get_duplicates=  s     #..r?   c                     | j         j        S )z~Get head/tail samples based on the configuration

        Returns:
            A dict with the head and tail samples.
        )r   r0   rz   s    r=   
get_samplezProfileReport.get_sampleE  s     #**r?   c                     | j         S )zReturn the description (a raw statistical summary) of the dataset.

        Returns:
            Dict containing a description for each variable in the DataFrame.
        )r   rz   s    r=   get_descriptionzProfileReport.get_descriptionM  s     ##r?   c                 .    d | j         j        D             S )zGet variables that are rejected for analysis (e.g. constant, mixed data types)

        Returns:
            a set of column names that are unsupported
        c                 F    h | ]}|j         t          j        k    |j        S r9   )
alert_typer   REJECTEDcolumn_name)r:   alerts     r=   	<setcomp>z7ProfileReport.get_rejected_variables.<locals>.<setcomp>[  s6     
 
 
9#555 555r?   )r   alertsrz   s    r=   get_rejected_variablesz$ProfileReport.get_rejected_variablesU  s)    
 
-4
 
 
 	
r?   output_filesilentc                    t          j                    5  t          j        d           t          j        d          j        }ddd           n# 1 swxY w Y   t          t          t          |	                    d                              }|dk     rt          j
        d           t          |t                    st          t          |                    }|j        dk    r|                                 }n| j        j        j        sot          |j                  | j        j        _        | j        j        j        &t          |j                  dz   | j        j        _        t/          | j        |           |                                 }|j        d	k    r4|j        }|                    d	          }t          j
        d
| d           | j        j         }t7          dd|          5 }|                    |d           |                                 ddd           n# 1 swxY w Y   |s	 ddlm}	 |	                     |!                                "                                           dS # tF          $ rA ddl$}
|
%                    |!                                "                                           Y dS w xY wdS )a  Write the report to a file.

        Args:
            output_file: The name or the path of the file to generate including the extension (.html, .json).
            silent: if False, opens the file in the default browser or download it in a Google Colab environment
        r   PillowN.)	      r   zGTry running command: 'pip install --upgrade Pillow' to avoid ValueErrorz.json_assetsz.htmlz
Extension zh not supported. For now we assume .html was intended. To remove this warning, please use .html or .json.r   zExport report to filetotaldescdisablezutf-8)encodingr   files)&warningscatch_warningssimplefilterpkg_resourcesget_distributionversiontuplemapintsplitwarnrE   r   strsuffixto_jsonr)   r   inlineparentassets_pathassets_prefixstemr    to_htmlwith_suffixprogress_barr   
write_textrH   google.colabr   downloadabsoluteas_uriModuleNotFoundError
webbrowseropen_new_tab)rW   r   r   pillow_versionversion_tupledatar   disable_progress_barpbarr   r   s              r=   to_filezProfileReport.to_filea  s[    $&& 	N 	N!(+++*;HEEMN	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N c#~';';C'@'@AABB9$$MY   +t,, 	1s;//00K((<<>>DD;#* =/2;3E/F/F ,;#19589I5J5JY5VDK$2"4;<<<<<>>D!W,,$+)55g>>J J J J  
 $(;#;;1;O
 
 
 	""4'":::KKMMM		 	 	 	 	 	 	 	 	 	 	 	 	 	 	  	II......{3355<<>>?????& I I I!!!!''(<(<(>(>(E(E(G(GHHHHHHI	I 	Is6   .AAA+,H##H'*H'1?I2 2AJ=<J=c                    ddl m} | j        }t          dd| j        j                   5 } |t          j        |                                        | j        j	        j
        | j        j	        j        | j        j	        j        | j        j	        j        | j        j	        j        j        d         | j        j	        j        j        | j        j	        j        j        | j        j        j        | j        j        j        | j        j        d         
  
        }| j        j	        j        rddlm}  ||d	d	
          }|                                 d d d            n# 1 swxY w Y   |S )Nr   )
HTMLReportr   zRender HTMLr   ydata_profiling_version)
navoffliner   r   primary_colorlogothemetitledater   )minifyT)remove_all_empty_spaceremove_comments),ydata_profiling.report.presentation.flavoursr   rV   r   r)   r   copydeepcopyrenderr   navbar_showuse_local_assetsr   r   styleprimary_colorsr   r   r   analysisr   
date_startpackageminify_htmlhtmlmin.mainr   rH   )rW   r   rV   r   r   r   s         r=   r   zProfileReport._render_html  s   KKKKKK-T[5M1M
 
 
 	:dmF3344;;K$0(9{'."k.<"k.4CAF[%+0k&,2*39)2=,45NO <  D {+ W//////vd4QUVVVKKMMM)	 	 	 	 	 	 	 	 	 	 	 	 	 	 	* s   D EEEc                    ddl m} | j        }t          dd| j        j         d          5 } |t          j        |                                                    }|	                                 d d d            n# 1 swxY w Y   |S )Nr   )WidgetReportr   zRender widgetsF)r   r   r   leave)
r   r   rV   r   r)   r   r   r   r   rH   )rW   r   rV   r   r   s        r=   r   zProfileReport._render_widgets  s    MMMMMM!00	
 
 
 	
 "l4=#8#899@@BBGKKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s   AA<<B B c                 Z   dt           dt           ffd| j        }t          dd| j        j                   5 }t          |          } |          }t          || j                  }t          j        |d          }|	                                 d d d            n# 1 swxY w Y   |S )	Nor^   c                    t          |           rt          |           } t          | t                    r fd|                                 D             S t          | t
          t          t          t          f          r| S t          | t                    rfd| D             S t          | t                    rfd| D             S t          | t          j                  r |                                           S t          | t          j                  r |                     d                    S t          | t           j                  r |                                           S t          | t&                    r |                                           S t          | t           j                  r|                                 S t          |           S )Nc                 @    i | ]\  }} |           |          S r9   r9   )r:   kv	encode_its      r=   
<dictcomp>zAProfileReport._render_json.<locals>.encode_it.<locals>.<dictcomp>  s/    IIItq!		!iillIIIr?   c                 &    g | ]} |          S r9   r9   r:   r   r   s     r=   
<listcomp>zAProfileReport._render_json.<locals>.encode_it.<locals>.<listcomp>  !    444QIIaLL444r?   c                 &    h | ]} |          S r9   r9   r   s     r=   r   z@ProfileReport._render_json.<locals>.encode_it.<locals>.<setcomp>  r   r?   records)orient)r   r   rE   rJ   itemsboolr   floatr   r   setrF   Seriesto_listr   to_dictnpndarraytolistr   genericitem)r   r   s    r=   r   z-ProfileReport._render_json.<locals>.encode_it  s   A 1II!T"" "IIIIqwwyyIIIIa$UC!899 "H4(( "4444!44443'' "4444!444429-- "$9QYY[[1112<00 	"$9QYYiY%@%@AAA2:.. "$9QXXZZ0006** "$9QVVXX...2:.. "6688Oq66Mr?   r   zRender JSONr      )indent)
r   r   r   r)   r   r   r   r   dumpsrH   )rW   descriptionr   description_dictr   r   s        @r=   r   zProfileReport._render_json  s   	" 	" 	" 	" 	" 	" 	" 	"2 *-T[5M1M
 
 
 	-k::(y)9::-.>LL:.q999DKKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s   AB  B$'B$c                     | j         S )zGenerate and return complete template as lengthy string
            for using with frameworks.

        Returns:
            Profiling report html including wrapper.

        )r   rz   s    r=   r   zProfileReport.to_html  s     yr?   c                     | j         S )z_Represent the ProfileReport as a JSON string

        Returns:
            JSON string
        )r   rz   s    r=   r   zProfileReport.to_json  s     yr?   c                     ddl m} ddlm} t	          j                    5  t	          j        d            | || j        |                      ddd           dS # 1 swxY w Y   dS )a  Used to output the HTML representation to a Jupyter notebook.
        When config.notebook.iframe.attribute is "src", this function creates a temporary HTML file
        in `./tmp/profile_[hash].html` and returns an Iframe pointing to that contents.
        When config.notebook.iframe.attribute is "srcdoc", the same HTML is injected in the "srcdoc" attribute of
        the Iframe.

        Notes:
            This constructions solves problems with conflicting stylesheets and navigation links.
        r   display)get_notebook_iframer   N)IPython.displayr  <ydata_profiling.report.presentation.flavours.widget.notebookr  r   r   r   r)   )rW   r  r  s      r=   to_notebook_iframez ProfileReport.to_notebook_iframe  s     	,+++++	
 	
 	
 	
 	
 	

 $&& 	< 	<!(+++G''T::;;;	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	<s   /AA #A c                     	 ddl m} t          j        d           n# t          $ r Y nw xY wddlm}  || j                   dS )z,The ipython notebook widgets user interface.r   r   zIpywidgets is not yet fully supported on Google Colab (https://github.com/googlecolab/colabtools/issues/60).As an alternative, you can use the HTML report. See the documentation for more information.r  N)r   r   r   r   r   r  r  r   )rW   r   r  s      r=   
to_widgetszProfileReport.to_widgets  s    	******Mn    # 	 	 	D	 	,+++++s    
**c                 .    |                                   dS )zPThe ipython notebook widgets user interface gets called by the jupyter notebook.N)r  rz   s    r=   _repr_html_zProfileReport._repr_html_"  s    !!!!!r?   c                     dS )z<Override so that Jupyter Notebook does not print the object. r9   rz   s    r=   __repr__zProfileReport.__repr__&  s    rr?   otherc                 <    ddl m}  || |g||n| j                  S )a  Compare this report with another ProfileReport
        Alias for:
        ```
        ydata_profiling.compare([report1, report2], config=config)
        ```
        See `ydata_profiling.compare` for details.

        Args:
            other: the ProfileReport to compare to
            config: the settings object for the merged ProfileReport. If `None`, uses the caller's config

        Returns:
            Comparison ProfileReport
        r   )compare)ydata_profiling.compare_reportsr$  r)   )rW   r"  r)   r$  s       r=   r$  zProfileReport.compare*  s7    " 	<;;;;;we}0BffTTTr?   )NFFNFFNNTNNNNr8   )T)r^   N)7__name__
__module____qualname____doc__rw   rv   ru   rs   rt   r   __annotations__r   r   rF   r   r
   r  r   rJ   r   r   r   r]   staticmethodrA   rP   rx   propertyr3   r4   r   r   r   r   rV   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r  r  r  r!  r$  r9   r?   r=   r(   r(   6   sn         
 GEHE 9= $!!%26,0/3%)&*] ]U2<345] ] 	]
 ] ] ] ] eD#I./] ] .)] ^,] "] d^] ] ] ]~ !U2<345!! ! eD#I./	!
 ! 
! ! ! \!F U2<345FN	%j01	2   \.) )x} ) ) ) ) )8 .1    X
  N       X  	% 	% 	% 	% X	% #    X
     X
 c    X
 c    X
     X/ 6 / / / /+D + + + +$ $ $ $ $

 

 

 

 

5I 5I5d#3 5IT 5IT 5I 5I 5I 5Inc    8    %c % % % %N        < < < <*    " " " "#    
 DHU U$U.6x.@U	U U U U U Ur?   r(   )Er   r   r   pathlibr   typingr   r   r   ydata_profiling.utils.backendr   r   r   r   r	   r
   pyspark.sqlr   dataclassesr   r   numpyr  pandasrF   	tqdm.autor   	typeguardr   visionsr   ydata_profiling.configr   r   r   #ydata_profiling.expectations_reportr   ydata_profiling.modelr   ydata_profiling.model.alertsr   ydata_profiling.model.describer   r~   ydata_profiling.model.sampler    ydata_profiling.model.summarizerr   r   r   r   ydata_profiling.model.typesetr   ydata_profiling.reportr   (ydata_profiling.report.presentation.corer   ;ydata_profiling.report.presentation.flavours.html.templatesr     ydata_profiling.serialize_reportr!   ydata_profiling.utils.dataframer"   ydata_profiling.utils.loggerr#   ydata_profiling.utils.pathsr$   ri   r(   r9   r?   r=   <module>rF     sR            ' ' ' ' ' ' ' ' ' ' > > > > > >X  H(###                4&&JJ333333 , , , , , , , ,               ! ! ! ! ! ! " " " " " " B B B B B B B B B B B B B B B B 1 1 1 1 1 1 2 2 2 2 2 2 B B B B B B / / / / / /            ; : : : : : 7 7 7 7 7 7 9 9 9 9 9 9      = < < < < < : : : : : : 8 8 8 8 8 8 2 2 2 2 2 2	n	-	-	- FU FU FU FU FUO%7 FU FU FU FU FUs   AAA