
    Q/Ph%                     l   d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 	 ddedeej2        df         de'dedee3         defdZ4dS ) zIOrganize the calculation of statistics for each series in this DataFrame.    )datetime)AnyDictOptionalUnionN)tqdm)VisionsTypeset)Settings)BaseAnalysisBaseDescription)
get_alerts)calculate_correlationget_active_correlations)
preprocess)TimeIndexAnalysis)get_duplicates)get_missing_activeget_missing_diagram)get_scatter_plotget_scatter_tasks)get_custom_sample
get_sample)BaseSummarizer)get_series_descriptions)get_table_stats)get_time_index_description)progress)__version__configdfzpyspark.sql.DataFrame
summarizertypesetsamplereturnc                 	     t           t                    st          dt                                t          t          j                  sk	 ddlm} t          |          s t          dt                     d          n3# t          $ r&}t          dt                     d          |d}~ww xY wt                     d	}t          |d
 j
         d          5 t          j                    }xj        t          j                  z  c_        t!           ||                               d           xj        dz  c_        d                                  D             }	d |	                                D             }
d |	                                D             }                                  t)          t*          d                      }|d         dk    r\t-                     }xj        t          |          z  c_          fd|D             }d |                                D             }ni }                    d           t/           |          }xj        t          |          z  c_        d |D             }|D ]7\  }} t)          t0          d| d|            |||          ||         |<   8t3           |          }xj        t          |          z  c_         fd|                                D             }d |                                D             }                    d           |t5                     }nt7          |          }                                  t)          t8          d           |
          \  }}|                    |            t)          t:          d           | |          } j        j        j         rtC           |          }                    d           tD           #                                d}                                                     d            t          j                    }ddd           n# 1 swxY w Y   tI           j%        ||          }d} j        j        j         r|rtM          d"i |}tO          ||| |||||||!          }|S )#az  Calculate the statistics for each series in this DataFrame.

    Args:
        config: report Settings object
        df: DataFrame.
        summarizer: summarizer object
        typeset: visions typeset
        sample: optional, dict with custom sample

    Returns:
        This function returns a dictionary containing:
            - table: overall statistics.
            - variables: descriptions per series.
            - correlations: correlation matrices.
            - missing: missing value diagrams.
            - alerts: direct special attention to these patterns in your data.
            - package: package details.
    z)`config` must be of type `Settings`, got r   )	DataFramezO`df` must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got .zN`df must be either a `pandas.DataFrame` or a `pyspark.sql.DataFrame`, but got z0.If using Spark, make sure PySpark is installed.N   zSummarize dataset)totaldescdisablepositionzGet variable types   c                 &    i | ]\  }}||d          S )type ).0columndescriptions      ^/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/ydata_profiling/model/describe.py
<dictcomp>zdescribe.<locals>.<dictcomp>_   s3     
 
 
# K'
 
 
    c                 $    g | ]\  }}|d k    |S )Unsupportedr0   r1   r2   	type_names      r4   
<listcomp>zdescribe.<locals>.<listcomp>c   s1     
 
 
!	M)) )))r6   c                      g | ]\  }}|d v 	|S )>   Numeric
TimeSeriesr0   r9   s      r4   r;   zdescribe.<locals>.<listcomp>h   s2     
 
 
!	555 555r6   zGet dataframe statisticsnc                 ^    i | ])}| t          t          d | d          |          *S )z
Calculate z correlation)r   r   )r1   correlation_namer   r    pbarseries_descriptions     r4   r5   zdescribe.<locals>.<dictcomp>y   sj        % ! #()?!1???# # ".0B	#D #D  r6   c                     i | ]
\  }}|||S Nr0   )r1   keyvalues      r4   r5   zdescribe.<locals>.<dictcomp>   s*       )sEUEVUEVEVEVr6   zGet scatter matrixc                     i | ]
\  }}||d iS rE   r0   )r1   xys      r4   r5   zdescribe.<locals>.<dictcomp>   s/     5
 5
 5
!QA4y5
 5
 5
r6   zscatter z, c           
      `    i | ]*\  }}| t          t          d |           |          +S )zMissing diagram )r   r   )r1   namesettingsr   r    rB   s      r4   r5   zdescribe.<locals>.<dictcomp>   s[     
 
 
 h P(.6O6O6OPPH 
 
 
r6   c                     i | ]
\  }}|||S rE   r0   )r1   rL   rG   s      r4   r5   zdescribe.<locals>.<dictcomp>   s#    WWW;4UEV4EVEVEVr6   zTake samplezDetecting duplicatesz
Get alertszGet reproduction details)ydata_profiling_versionydata_profiling_config	Completed)analysistime_index_analysistable	variablesscattercorrelationsmissingalertspackager#   
duplicatesr0   )(
isinstancer
   	TypeErrorr/   pdr&   pyspark.sqlImportErrorr   r   progress_barr   utcnowr)   lencolumnsr   set_postfix_stritemsupdater   r   r   r   r   r   r   r   r   r   vars
timeseriesactiver   r   jsonr   titler   r   )!r   r    r!   r"   r#   SparkDataFrameexnumber_of_tasks
date_startrU   supported_columnsinterval_columnstable_statscorrelation_namesrW   scatter_tasksscatter_matrixrI   rJ   missing_maprX   samplesmetricsr[   rY   tsindex_descriptionrZ   date_endrR   rS   r3   rB   rC   s!   ``                             @@r4   describer|      sj   4 fh'' TRDLLRRSSS b",'' 	??????b.11 qfjkmfnfnqqq    	 	 	Caefhaiai C C C  	 
FB		BO	 ''	
 
 
 o%
 
_&&
 	

c"*oo%

4B
GT
 
 	1222

a


 
'9'?'?'A'A
 
 
	
 
%.__%6%6
 
 


 
%.__%6%6
 
 

 	 Rh6PQQB*
 

 sq   7 ? ?JJ#/000JJ       ):  L -9-?-?-A-A  LL L 	1222)&2BCC

c-(((

5
 5
%25
 5
 5
 " 	2 	2DAq$8 $(;1(;(;(;(;$ $b!Q 0$2 $2N1a  
 )==

c+&&&


 
 
 
 
 
 #."3"3"5"5	
 
 
 XW'--//WWW 	]+++> ,,GG'//G Uh~t=STTB)
 
 	7###9*dL99K!3\
 
 ;!( 	V"<VR"U"U7888'2&,kkmm
 
 	[)))?$$_o% o% o% o% o% o% o% o% o% o% o% o% o% o% o%b FL*h??H{$ G)< G/FF2EFF!/$!  K s+   6B 
B;!B66B;)NRR
R
rE   )5__doc__r   typingr   r   r   r   pandasr^   	tqdm.autor   visionsr	   ydata_profiling.configr
   ydata_profiling.modelr   r   ydata_profiling.model.alertsr   "ydata_profiling.model.correlationsr   r   ydata_profiling.model.dataframer   !ydata_profiling.model.descriptionr    ydata_profiling.model.duplicatesr   ydata_profiling.model.missingr   r   ydata_profiling.model.pairwiser   r   ydata_profiling.model.sampler   r    ydata_profiling.model.summarizerr   ydata_profiling.model.summaryr   ydata_profiling.model.tabler   &ydata_profiling.model.timeseries_indexr   "ydata_profiling.utils.progress_barr   ydata_profiling.versionr   r&   dictr|   r0   r6   r4   <module>r      s?   O O       - - - - - - - - - - - -           " " " " " " + + + + + + ? ? ? ? ? ? ? ? 3 3 3 3 3 3        7 6 6 6 6 6 ? ? ? ? ? ? ; ; ; ; ; ; Q Q Q Q Q Q Q Q N N N N N N N N F F F F F F F F ; ; ; ; ; ; A A A A A A 7 7 7 7 7 7 M M M M M M 7 7 7 7 7 7 / / / / / / "t ttbl334t t 	t
 TNt t t t t t tr6   