
    Q/Ph                     
   d Z ddlZddlmZmZmZmZmZmZ ddl	Z	ddl
ZddlZddlZddl	mZ ddlmZ ddlmZmZmZmZ ddlmZmZ ddlmZ dd	lmZm Z  dd
l!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ de0dede1fdZ2	 drde)deej3        eej3                 f         de4dej5        fdZ6	 	 	 dsde)dej7        dee0ej7        f         de4de8de8dej5        fdZ9 e-            de)dej3        de1fd             Z: e-            	 dtde)dej7        dee0ej7        f         de8de1f
d!            Z; e-            	 dtde)dej7        dee0ej7        f         de8de1f
d"            Z<d#eeeef         defd$Z=d%e0dee0         fd&Z> e-            dude)d(ej?        d)e0de1fd*            Z@ e-            de)dej3        de1fd+            ZA e-            	 dvde)dej3        d.e1d/e1de1f
d0            ZB e-            de)d1ej3        d2ej3        d.e1d/e1de1fd3            ZC	 dtd(ej3        d4ed5e8deejD        e	jE        jF        f         fd6ZG	 dtd(ej3        d4ed5e8deejD        e	jE        jF        f         fd7ZH e-            de)d(ej3        de1fd8            ZIde)dee1         fd9ZJdej3        d:e	jK        jL        de	jK        jL        fd;ZM e-            	 dwde)deej3        eej3                 f         d=eej3        eej3                 f         de4de	jN        j5        f
d>            ZO e-            	 	 dxde)d?ede4d@e8de	jN        j5        f
dA            ZP	 drde)deeQej3        f         de4de	jN        j5        fdBZR e-            	 dyde)deeQej3        f         deeSeSf         de1fdD            ZTde)dej3        de0fdEZU	 dzde)dej3        de4de1fdGZV	 dzde)deej3                 de4de1fdHZW e-            	 dzde)deeQej3        f         de4de1fdI            ZX	 	 	 d{dKej?        dLe1dMeee1eQf                  dNe0dOeee1                  dej?        fdPZY	 	 d|dSej?        dee0e0f         dTe1dejD        fdUZZe%	 	 	 	 	 d}dKej?        dLe1dMeee1eQf                  dNe0dOeee1                  dee0e0f         dTe1dejD        fdV            Z[	 d~d:e	jK        jL        dXe1de	jK        jL        fdYZ\	 	 	 	 	 dd_ej3        d`e0deeSeSf         daeSdbe8dTeeSdcf         dde0de	jK        jL        fdeZ]	 	 	 	 	 ddfedgee1         dhe0deeSeSf         dTeeSdcf         daeSdbe8dde0de	jK        jL        fdiZ^	 	 	 	 	 	 	 	 ddledmedeeSeSf         daeSdbe8dde0d#e1dne8doe8dpe	jK        jL        de	jK        jL        fdqZ_dS )z(Plot functions for the profiling report.    N)AnyCallableListOptionalTupleUnion)pyplot)PolyCollection)ColormapLinearSegmentedColormapListedColormaprgb2hex)AutoDateLocatorConciseDateFormatter)Patch)FuncFormatterMaxNLocator)plot_acf	plot_pacf)typechecked)	WordCloud)Settings)convert_timestamp_to_datetime)manage_matplotlib_context)plot_360_n0sc0petick_valtick_posreturnc                 F    t          |                               d          S )Nz%Y-%m-%d %H:%M:%S)r   strftime)r   r   s     b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/ydata_profiling/visualisation/plot.py	format_fnr"      s    (22;;<OPPP          configseriesfigsizec           	         t          |t                    s|g}t          j        |          }t	          |          D ]\  }}|                                }t          | j        j        ddddd          	                    |          }|
                    dt          |          |dz             }|                    |           |                    d	           |S )
Nr)   white{   i,        )	font_pathbackground_colorrandom_statewidthheightscale   off)
isinstancelistpltfigure	enumerateto_dictr   plotr0   generate_from_frequenciesadd_subplotlenimshowaxis)	r'   r(   r)   r>   iseries_data	word_dict	wordcloudaxs	            r!   _plot_word_cloudrI      s    
 fd## :g&&&D#F++  ;''))	k+$
 
 
 $
#I
.
. 	 aVa!e44
		)
Kr#   Fbinsdate
hide_yaxisc                    t          |t                    rt          | j        j        j                  }t          j        |          }|                    d          }t          t          t          |                              D ]}	t          |          rct          j        ||	                   }
|                    ||	         dd         |
dz  z   ||	         |
| j        j        j        |	         d           |r,|j                            t#          t$                               | j        j        j        s|                    g            |r|j                            d           | j        j        j        s|                    g            |s|                    d	           nt          j        |          }|                    d          }|s|                    d	           n,|j                                                            d           t          j        |          }
|                    |dd         |
dz  z   ||
| j        j        j        d
                    |r,|j                            t#          t$                               | j        j        j        s|                    g            |S )a  Plot a histogram from the data and return the AxesSubplot object.

    Args:
        config: the Settings object
        series: The data to plot
        bins: number of bins (int for equal size, ndarray for variable size)
        figsize: The size of the figure (width, height) in inches, default (6,4)
        date: is the x-axis of date type

    Returns:
        The histogram plot.
    r+   o   Nr/   g333333?)	facecoloralphaF	Frequencyr   )rP   )r8   r9   rA   htmlstyle_labelsr:   r;   r@   reversedrangenpdiffbarprimary_colorsxaxisset_major_formatterr   r"   r>   	histogramx_axis_labelsset_xticklabelsyaxisset_visiblexticklabels	supylabel
set_ylabelaxes	get_yaxis)r'   r(   rJ   r)   rK   rL   n_labelsfigr>   idxrY   s              r!   _plot_histogramrk   7   s   * $ 2%v{(011j)))s##Dx1122 	. 	.C4yy wtCy))IcrcNTAX-3K$k/>sC      I
..}Y/G/GHHH;(6 )$$R((( .
&&u---{$2 	 OOB 	'MM+&&&j)))s## 	5OOK((((I!!--e444wt}}"Iq k'6q9	 	 	
 	
 	
  	EJ**=+C+CDDD{$2 	%  $$$Kr#   word_countsc                 B    t          | |           t          |           S )N)r'   r(   )rI   r   )r'   rl   s     r!   plot_word_cloudrn      s#    F;7777F###r#   c                     t          | |||d          }|j                            |rdnd           |j                                         t          |           S )a"  Plot an histogram of the data.

    Args:
        config: Settings
        series: The data to plot.
        bins: number of bins (int for equal size, ndarray for variable size)
        date: is histogram of date(time)?

    Returns:
      The resulting histogram encoded as a string.

    )      )rK   r)   Z   -   rotation)rk   r\   set_tick_paramsr;   tight_layoutr   )r'   r(   rJ   rK   r>   s        r!   r^   r^      s`    & 664dFKKKDJd(:;;;KF###r#   c                 X   t          | ||d|d          }|                    d           |j                                        D ] }|j                            |rdnd           !|j                            |rdnd	           |j                                         t          |           S )
a  Plot a small (mini) histogram of the data.

    Args:
      config: Settings
      series: The data to plot.
      bins: number of bins (int for equal size, ndarray for variable size)

    Returns:
      The resulting mini histogram encoded as a string.
    rq   g      @T)r)   rK   rL   wr%      rr   rs   rt   )
rk   set_facecolorr\   get_major_tickslabel1set_fontsizerv   r;   rw   r   )r'   r(   rJ   rK   r>   ticks         r!   mini_histogramr      s    " idt  D 	s
**,, 3 3  d!12222Jd(:;;;KF###r#   cmapc                 z     | t          j        dd| j        dz                      }t          j        d|          S )zGet the upper half of the color map

    Args:
        cmap: the color map

    Returns:
        A new color map based on the upper half of another color map

    References:
        https://stackoverflow.com/a/24746399/470433
          ?r6   r/   	cmap_half)rX   linspaceNr   	from_list)r   colorss     r!   get_cmap_halfr      s<     T"+c1dfk2233F #,[&AAAr#   rh   c                 R    | dk    rd}n| dk    rd}n| dk    rd}n| dk    rd}nd	S |S )
zDynamic label font sizes in correlation plots

    Args:
        n_labels: the number of labels

    Returns:
        A font size or None for the default font size
    d   r&   P      2   r%   (   r{   N )rh   	font_sizes     r!   get_correlation_font_sizer      sO     #~~			B			B			B		tr#   rO   datavminc           
         t          j                    \  }}t          j        | j        j        j                  }|dk    rt          |          }t          j        |          }|                    | j        j        j	                   |j
        }|                    ||dd|          }t          j        |           |                                j                                        r<t!           |t"          j                  d          g}t          j        |dd	           |                    t#          j        d|j        d         t/          |j        d                   t1          |          z                       |                    t#          j        d|j        d         t/          |j        d                   t1          |          z                       t5          t1          |                    }	|                    |d
|	           |                    ||	           t          j        d           t=          |           S )zPlot image of a matrix correlation.

    Args:
      config: Settings
      data: The matrix correlation to plot.
      vmin: Minimum value of value range.

    Returns:
      The resulting correlation matrix encoded as a string.
    r   r6   nearest)r   vmaxinterpolationr   zinvalid
coefficient)rP   labelzupper rightg      @)handleslochandleheightrr   ru   fontsizer   g?)bottom)r:   subplotsget_cmapr>   correlationr   r   copyset_badbadcolumnsrB   colorbarisnullvaluesanyr   rX   nanlegend
set_xticksarangeshapefloatrA   
set_yticksr   r`   set_yticklabelssubplots_adjustr   )
r'   r   r   fig_coraxes_corr   labelsmatrix_imagelegend_elementsr   s
             r!   correlation_matrixr      s    GX</455DqyyT""9T??DLL(,---\F??4ayt #  L L{{}}!! 
 44<<?UVVVW
#	
 	
 	
 	
 	!TZ]E$*Q-4H4H3v;;4VWWXXX	!TZ]E$*Q-4H4H3v;;4VWWXXX)#f++66IVb9EEEVi888s####F###r#   c                    t          j        d           t          j        d           | j        j        j        d         }t          |          | j        j        k    r8t          j
        |d          }t          j        |j        |j        |           n!t          j        |j        |j        |           t          |           S )aH  Scatter plot (or hexbin plot) from a series of complex values

    Examples:
        >>> complex_series = pd.Series([complex(1, 3), complex(3, 1)])
        >>> scatter_complex(complex_series)

    Args:
        config: Settings
        series: the Series

    Returns:
        A string containing (a reference to) the image
    	ImaginaryRealr   Tas_cmap)r   color)r:   ylabelxlabelrS   rT   r[   rA   r>   scatter_thresholdsnslight_palettehexbinrealimagscatterr   )r'   r(   r   r   s       r!   scatter_complexr     s     J{JvK,Q/E
6{{V[222 555
6;$77777FKE::::F###r#   WidthHeightx_labely_labelc                    t          j        |           t          j        |           | j        j        j        d         }t          |                                 }t          |          | j	        j
        k    r(t          j        |d          }t          j        |d|i nt          j        |d|i t          |           S )az  Scatter plot (or hexbin plot) from one series of sequences with length 2

    Examples:
        >>> scatter_series(file_sizes, "Width", "Height")

    Args:
        config: report Settings object
        series: the Series
        x_label: the label on the x-axis
        y_label: the label on the y-axis

    Returns:
        A string containing (a reference to) the image
    r   Tr   r   r   )r:   r   r   rS   rT   r[   ziptolistrA   r>   r   r   r   r   r   r   )r'   r(   r   r   r   r   r   s          r!   scatter_seriesr   6  s    $ JwJwK,Q/E D
6{{V[222 555
D$t$$$$T''''F###r#   series1series2c                    t          j        |           t          j        |           | j        j        j        d         }|                                |                                z  }t          |          | j        j	        k    r;t          j        |d          }t          j        ||         ||         d|           n#t          j        ||         ||         |           t          |           S )a  Scatter plot (or hexbin plot) from two series

    Examples:
        >>> widths = pd.Series([800, 1024])
        >>> heights = pd.Series([600, 768])
        >>> scatter_series(widths, heights, "Width", "Height")

    Args:
        config: Settings
        series1: the series corresponding to the x-axis
        series2: the series corresponding to the y-axis
        x_label: the label on the x-axis
        y_label: the label on the y-axis

    Returns:
        A string containing (a reference to) the image
    r   Tr      )gridsizer   r   )r:   r   r   rS   rT   r[   notnarA   r>   r   r   r   r   r   r   )r'   r   r   r   r   r   indicesr   s           r!   scatter_pairwiser   V  s    * JwJwK,Q/E}}7==??3G
7||fk333 555
77#WW%5NNNNNGG$gg&6eDDDDF###r#   r   hide_legendc           	         | j         j                            t                    }t	          j        d          \  }}|                    d           |                    dt          j	        |                      |
                    dd           d}t          | ||          D ]\  }}}	|                    d|d|||	          }
|
d                                         \  }}}}||z  |z  d	k     rd
nd}|| 	                                z  dz  }|dk    r5t          |d          r%|dd| d}|                    |
|gd|dd           ||z  }d}|s|                    dddd          }||fS )a  Plot a stacked horizontal bar chart to show category frequency.
    Works for boolean and categorical features.

    Args:
        data (pd.Series): category frequencies with category names as index
        colors (list): list of colors in a valid matplotlib format
        hide_legend (bool): if true, the legend is omitted

    Returns:
        ax: Stacked bar plot (matplotlib.axes)
        legend: Legend handler (matplotlib)
    )rp   r/   r+   r7   r   g?g?r6   )yr3   r4   leftr   r   r   r,   darkgreyr   r{   	bar_label.1fz%
()centerzx-largebold)r   
label_typer   r   
fontweightNr   r   zxx-large
upper left)ncolbbox_to_anchorr   r   )indexr   astypestrr:   r   rC   set_xlimrX   sumset_ylimr   barhget_facecolorhasattrr   r   )r   r   r   r   _rH   startsxr   r   rectsrgb
text_colorpc_of_totaldisplay_txtr   s                     r!   _plot_stacked_barhr   y  s     Z%%c**F L(((EArGGENNNKK26$<<   KKSFtVV44  5%!1QV5PUVV 1X++--
1a !A	CWWZ
 $((**ns*??wr;77?(666!666KLL#}# "!     	!F 
6JL  
 
 v:r#   c                    dt           j        dt          fd}t          j        d          \  }}t          j        |  ||           ddi|          \  }}}d	}|s#t          j        || j        j        d
dd          }||fS )a  Plot a pie chart to show category frequency.
    Works for boolean and categorical features.

    Args:
        data (pd.Series): category frequencies with category names as index
        colors (list): list of colors in a valid matplotlib format
        hide_legend (bool): if true, the legend is omitted

    Returns:
        ax: pie chart (matplotlib.axes)
        legend: Legend handler (matplotlib)
    r   r   c                 0     dt           dt          f fd}|S )Npctr   c                     t          j                  }t          t          | |z  dz                      }| dd|ddS )Ng      Y@r   z%  (dr   )rX   r   intround)r  totalvalr   s      r!   
my_autopctz9_plot_pie_chart.<locals>.make_autopct.<locals>.my_autopct  sK    F6NNEeC%K%/0011C+++3+++++r#   )r   r   )r   r
  s   ` r!   make_autopctz%_plot_pie_chart.<locals>.make_autopct  s7    	,E 	,c 	, 	, 	, 	, 	, 	,
 r#   )r&   r&   r+   r   rz   )autopct	textpropsr   Nlarger   r   )r   r   r   )	pdSeriesr   r:   r   pier   r   r   )r   r   r   r  r   rH   wedgesr   s           r!   _plot_pie_chartr    s     RY 8     L(((EAr7T""C.	  LFAq F 
J!
 
 
 v:r#   c                    | j         j        j        }|*t          j        d                                         d         }t          |          t          |          k     rKt          t          |          t          |          z            dz   }||z  }|dt          |                   }| j         j        j        }|dk    rdt          |t                    r*|D ]&}t          ||| j        j        j                  \  }}'ngt          ||| j        j        j                  \  }}nB|dk    r%t          ||| j        j        j                  \  }}nd	| d
}| t!          |          t#          | |g n|gd          S )a  Generate category frequency plot to show category frequency.
    Works for boolean and categorical features.

    Modify colors by setting 'config.plot.cat_freq.colors' to a
    list of valid matplotib colors:
    https://matplotlib.org/stable/tutorials/colors/colors.html

    Args:
        config (Settings): a profile report config
        data (pd.Series): category frequencies with category names as index

    Returns:
        str: encoded category frequency plot encoded
    Nzaxes.prop_cycler   r6   r   rZ   )r   r  'z>' is not a valid plot type! Expected values are ['bar', 'pie']tight)bbox_extra_artistsbbox_inches)r>   cat_freqr   r:   rcParamsby_keyrA   r  typer8   r9   r   varscatredactr  
ValueErrorr   )	r'   r   r   
multiplier	plot_typevr>   r   msgs	            r!   cat_frequency_plotr%    s   ( [!(F~/07799'B 6{{SYYTS[[011A5
f$CII& $)IEdD!! 	  1v6;?+A     ff
 .f&+/*@  LD&& 
e		&tVAWXXXff1	 1 1 1 	 	oo!'22fX   r#   c                 ,   | j         j        j        }| j         j        j        }||k     rj|d         }t	          |          dk    r|d         nd}t          j        d||gt	          |                    fdt          j                  D             }|S )Nr   r/   r6   z#000000ts_legc                 @    g | ]}t           |                    S r   )r   ).0rD   r   s     r!   
<listcomp>z0create_comparison_color_list.<locals>.<listcomp>'  s)    :::q'$$q''"":::r#   )	rS   rT   r[   rU   rA   r   r   rW   r   )r'   r   r   initendr   s        @r!   create_comparison_color_listr-    s    [-F[&Fayv;;!++fQii&0D#;FTT::::E$&MM:::Mr#   rC   c                     t          | j        t          j                  rOt	                      }|j                            |           |j                            t          |                     |S N)	r8   r   r  DatetimeIndexr   r\   set_major_locatorr]   r   )r(   rC   locators      r!   _format_ts_date_axisr3  +  sa     &, 011 F!##
$$W---
&&';G'D'DEEEKr#   r%   rq   gapsc           	         t          j        |          }|                    d          }t          |           }t	          |t
                    rt          d |D                       }t          d |D                       }| j        j	        j
        }	t          ||||	          D ]x\  }
}}}|
                    |||dd           t          |
|           |j                            t!          d                     |D ]}|                    ||||d	
           yn|                    |d           t          ||           |j                            t!          d                     |D ]F}|                    ||                                |                                |d         d	
           Gt%          |           S )zPlot an line plot from the data and return the AxesSubplot object.
    Args:
        variables: The data to plot.
        figsize: The size of the figure (width, height) in inches, default (6,4).
    Returns:
        The TimeSeries lineplot.
    r+   rN   c              3   >   K   | ]}|                                 V  d S r/  )minr)  ss     r!   	<genexpr>z/plot_timeseries_gap_analysis.<locals>.<genexpr>J  *      ++q15577++++++r#   c              3   >   K   | ]}|                                 V  d S r/  )maxr9  s     r!   r;  z/plot_timeseries_gap_analysis.<locals>.<genexpr>K  r<  r#   ?T)rH   r   r   rQ   x_compat)integer      ?)r   y1y2r   rQ   )rH   r@  r   )r:   r;   r@   r-  r8   r9   r8  r>  rS   rT   rU   r   r>   r3  ra   r1  r   fill_betweenr   )r'   r(   r5  r)   ri   rH   r   min_max_r   seriegaps_r   r   gaps                  r!   plot_timeseries_gap_analysisrK  7  s    *W
%
%
%C			B)&11F&$ ++F+++++++F+++++"**-fdFF*K*K 	R 	R&E5%JJ     !+++H&&{4'@'@'@AAA R R#$4uDQQQQR	R 	rD)))VR(((
"";t#<#<#<=== 	 	COO&**,,6::<<vayPT      F###r#   	variablesr5   c           	         t          j        |          }|                    d          }t          t	          |                    }t          ||         d         t                    rt          |           }ddg}|                                D ]\  }}	t          t	          d |	d         D                                 rt          |	d                   D ]n\  }
}|rA||                                z
  |                                |                                z
  z  }|                    ||||
         ||
         d	           on|                                D ]t\  }}	|	d         d
k    rc|	d         }|rA||                                z
  |                                |                                z
  z  }|                    ||d           ut          j        dd           t          j        d           t!          |           S )a;  Plot an line plot from the data and return the AxesSubplot object.
    Args:
        variables: The data to plot.
        figsize: The size of the figure (width, height) in inches, default (6,4).
        scale: Scale series values between [0,1]. Defaults to False.
    Returns:
        The TimeSeries lineplot.
    r+   rN   r  -z--c                     g | ]}|d k    	S )
TimeSeriesr   )r)  ts     r!   r*  z,plot_overview_timeseries.<locals>.<listcomp>}  s    AAAql*AAAr#   r(   r?  )rH   r   	linestyler   rQ   rP  )rH   r   rQ   )gp=
ף?r6   r   )r   r   gffffff?)right)r:   r;   r@   nextiterr8   r9   r-  itemsallr<   r8  r>  r>   r   r   r   )r'   rL  r)   r5   ri   rH   colr   line_stylesr   rD   r(   s               r!   plot_overview_timeseriesrZ  f  s     *W
%
%
%C			B
tI

C)C.($// :-f55Dk"** 	 	IC4AADLAAABBCC 
!*4>!:!: 	 	IAv Y"(6::<<"7FJJLL6::<<<W!XKK!"-a.$Qi"      	 #** 	: 	:ICF||++h U$vzz||3

vzz||8STFrD999Ji\::::c""""F###r#   c                    t          j        |          }|                    d          }t          |t                    rb| j        j        j        }t          |           }t          |||          D ]/\  }}}	|
                    ||	dd          }
t          ||
           0n<|
                    | j        j        j        d         d          }
t          ||
           |S )zPlot an line plot from the data and return the AxesSubplot object.
    Args:
        series: The data to plot
        figsize: The size of the figure (width, height) in inches, default (6,4)
    Returns:
        The TimeSeries lineplot.
    r+   rN   g      ?T)r   r   rQ   r@  r   )r   r@  )r:   r;   r@   r8   r9   rS   rT   rU   r-  r   r>   r3  r[   )r'   r(   r)   ri   r>   r   r   rH  r   r   rH   s              r!   _plot_timeseriesr\    s     *W
%
%
%C??3D&$ 
)"*-f55#&vvv#>#> 	, 	,E5%%uD4PPB ++++	,
 [[v{0?BT[RRVR(((Kr#   ry   c                    t          | ||          }|j                            d           t          j        dd           |j                                        D ]V}t          |j        t          j	                  r|j
                            d           <|j
                            d           W|j                                         t          |           S )	a  Plot an time-series plot of the data.
    Args:
        config: profiling settings.
        series: The data to plot.
        figsize: The size of the figure (width, height) in inches, default (3, 2.25)
    Returns:
        The resulting timeseries plot encoded as a string.
    r+   rs   rt   ytickrq   )	labelsizer%   r{   )r\  r\   rv   r:   rcr}   r8   r   r  r0  r~   r   r;   rw   r   )r'   r(   r)   r>   r   s        r!   mini_ts_plotra    s     FFG<<<DJ+++F7a    
**,, ( (flB$455 	(K$$Q''''K$$Q''''KF###r#   c                 z    | j         j        j        }t          |          dz  dz
  }t	          j        ||g          S )Nr/   r6   )r  
timeseriespacf_acf_lagrA   rX   r8  )r'   r(   lagmax_lag_sizes       r!   _get_ts_lagrg    s9    
+
 
-CKK1$)L63%&&&r#   r   r   c           
         | j         j        j        d         }t          | |          }t	          j        dd|          \  }}t          |                                ||d         dd|d|i           t          |                                ||d         d	d
|d|i           |D ]7}|j	        D ]-}t          |          t          u r|                    |           .8t          |           S )Nr   r6   r/   nrowsncolsr)   ACFTr   lagsrH   titlefftr   vlines_kwargsPACFywmro  rH   rp  methodr   rr  )rS   rT   r[   rg  r:   r   r   dropnar   collectionsr  r
   r|   r   )	r'   r(   r)   r   re  r   rf   rH   items	            r!   _plot_acf_pacfrz    s    K,Q/E
ff
%
%Cl!W===GAt7'    7'     * *N 	* 	*DDzz^++""5)))	* F###r#   c                    | j         j        j        }t          | j         j        j                  }t          |           }t          j        |d|          \  }}d}t          |||          D ]w\  }\  }	}
}t          | |          }t          |                                ||	|rdndd|d|i           t          |                                ||
|rdndd	|d|i
           d}xt          ||          D ]>\  }}|D ]6}|j        D ],}t          |t                    r|                    |           -7?t#          |           S )Nr/   rj  Trm   r   rn  rs  rt  ru  F)rS   rT   r[   rA   rU   r-  r:   r   r   rg  r   rw  r   rx  r8   r
   r|   r   )r'   r(   r)   r   rh   r   rf   is_firstrH  acf_axis	pacf_axisr   re  rowrH   ry  s                   r!   _plot_acf_pacf_comparisonr    s    [-F6;$,--H)&11FlGDDDGAtH/264/H/H  +$)e&%((LLNN#+%%#U+	
 	
 	
 	
 	LLNN$,&&"#U+	
 	
 	
 	
 $'' . .
U 	. 	.B . .dN33 .&&u---.	.
 F###r#   c                 p    t          |t                    rt          | ||          S t          | ||          S r/  )r8   r9   r  rz  )r'   r(   r)   s      r!   plot_acf_pacfr     s:     &$ 7(AAAffg666r#   r   	dataframeentity_columnsortbymax_entitiesselected_entitiesc                 ^   |8d}| |                                                                          }||g|_        n?t          |t                    r|g}|g|}| |                                          }|d         }||         j        dk    rS	 t          j        ||                   ||<   n4# t          $ r'}t          d| d||         j         d          |d }~ww xY wt          j        d||                                         g          }	t          j        ||         |	dt          |	          	          |d
<   |                    |d
g          |                                         }|                                                    |d
|          j        }|r	||         }n
|d |         }|S )N_indexr   Ozcolumn z dtype z is not supported.r   T)rJ   include_lowestr   __bins)r   r   r   )r   reset_indexr   r8   r   dtyper  to_datetime	Exceptionr   rX   r8  nuniquecutrW   groupbycountpivot_tableT)
r  r  r  r  r  	sortbykeydfcolsexnbinss
             r!   _prepare_heatmap_datar  *  s    ~	}%**,,88::/

 fc"" 	XF''t_!!##1I		)}c!!	N2i=99ByMM 	 	 	S)SSByM,?SSS 	 FB9--//011E6
9E$uU||  BxL 
]H-	.	.y	9	?	?	A	AB
	IX}	M	M	
   !"Is   B* *
C4"CC   r   #337ab7r  r   c                 P   t          j        |          \  }}t          j        j                            dd|gd          }|                    | |                                d|          }|                    dt          j
        |                      |                    d	 t          t          |                     D                        |                    | j                   |                    g            |                    d
           |                                 |S )Nr+   reportr,   @   )r   rB  )
edgecolors	linewidthr   r   c                     g | ]}|d z   S r   r   r)  r   s     r!   r*  z._create_timeseries_heatmap.<locals>.<listcomp>d  s    333q1s7333r#   Time)r:   r   
matplotlibr   r   r   
pcolormeshr   set_climrX   nanmaxr   rW   rA   r   r   r   
set_xlabelinvert_yaxis)r  r)   r   r   rH   r   pcs          r!   _create_timeseries_heatmapr  Y  s   
 L)))EAr4>>7E"b ?  D 
rb&6&6&8&8Dt	T	TBKK29R==!!!MM33E#b''NN333444rx   MM"MM&OOIr#   c                 x    t          | ||||          }t          |||          }|                    d           |S )a`  Generate a multi entity timeseries heatmap based on a pandas DataFrame.

    Args:
        dataframe: the pandas DataFrame
        entity_column: name of the entities column
        sortby: column that define the timesteps (only dates and numerical variables are supported)
        max_entities: max entities that will be displayed
        selected_entities: Optional list of entities to be displayed (overules max_entities)
        figsize: The size of the figure (width, height) in inches, default (10,5)
        color: the primary color, default '#337ab7'
    Returns:
        The TimeSeries heatmap.
    r6   )r  r  
set_aspect)	r  r  r  r  r  r)   r   r  rH   s	            r!   timeseries_heatmapr  l  sI    . 
=&,8I
 
B 
$B	7	7BMM!Ir#   none	tick_markc                     dD ]"}| j         |                             d           #| j                            |           | j                            |           | S )N)toprS  r   r   F)spinesrb   r\   set_ticks_positionra   )rC   r  anchors      r!   _set_visibilityr    sb     5 / /F''....J!!),,,J!!),,,Kr#      
      T=
ףp=?r  r  rs   notnull_countsrk  r   r   .label_rotationc                 t   | |z  }t          |           dk    r|j                            |||          }|                    |                                d||           |                                }	|	                    |                                           |	                    |	                                           |	                    | d||           n|j        
                    |||          }|r|                                ng }
|                    |
|           |                                }	|	                    |                                           |	                    |                                           |	                    | |           ||	fD ]}t%          |          }|S )a  
    A bar chart visualization of the missing data.

    Inspired by https://github.com/ResidentMario/missingno

    Args:
        notnull_counts: Number of nonnull values per column.
        nrows: Number of rows in the dataframe.
        figsize: The size of the figure to display.
        fontsize: The figure's font size. This default to 16.
        labels: Whether or not to display the column names. Would need to be turned off on particularly large
            displays. Defaults to True.
        color: The color of the filled columns. Default to the RGB multiple `(0.25, 0.25, 0.25)`.
        label_rotation: What angle to rotate the text labels to. Defaults to 45 degrees.
    Returns:
        The plot axis.
    r   )r)   r   r   rS  )har   ru   r   r   )rA   r>   rZ   r`   get_xticklabelstwinyr   
get_xticksr   get_xlimr   get_yticklabelsr   twinxr   
get_yticksr   get_ylimr  )r  rk  r)   r   r   r   r  
percentageax0ax1ylabelsrH   s               r!   missing_barr    s   4  %'J
>b  o!!'HE!RR!!#	 	 	
 	
 	
 iikks~~''(((S\\^^$$$v> 	 	
 	
 	
 	
 o""7XU"SS+19#%%'''rGh777iikks~~''(((S\\^^$$$NX>>>Cj ! !R  Jr#   notnullr   r4   c                 4   t          |          }t          j        ||dft          j                  }	||	| <   g d|	|  <   t	          j        dd|          \  }
}|                    |	d           |                    d           |                    d	           |j	        
                                 d
}|                    t          t          d|                               |                    ||||           |                    d|dz
  g           |                    d|g|           d t          d|dz
            D             }|D ]}|                    |dd           |s|dk    r|                    g            t%          |          }|S )a  
    A matrix visualization of missing data.

    Inspired by https://github.com/ResidentMario/missingno

    Args:
        notnull: Missing data indicator matrix.
        columns: List of column names.
        height: Number of rows in the dataframe.
        figsize: The size of the figure to display.
        fontsize: The figure's font size. Default to 16.
        labels: Whether or not to display the column names when there is more than 50 columns.
        label_rotation: What angle to rotate the text labels to. Defaults to 45 degrees.
        color: The color of the filled columns. Default is `(0.41, 0.41, 0.41)`.
    Returns:
        The plot axis.
    rq   )r  )r6   r6   r6   r6   r+   r  )r   autoFr   r   ru   r  r   r   c                     g | ]}|d z   S r  r   r  s     r!   r*  z"missing_matrix.<locals>.<listcomp>  s    777a!c'777r#   rN  r,   )rR  r   r   )rA   rX   zerosfloat32r:   r   rB   r  gridr\   tick_topr   r9   rW   r`   r   r   axvliner  )r  r   r4   r)   r   r   r   r  r3   missing_gridr   rH   r  
separatorspoints                  r!   missing_matrixr    s   6 LLE8VUA.bjAAAL!L&YYL'LAw///EAr IIl&I111MM&GGENNNH	BMM$uQ''(((wBRRRMM1fqj/"""6{X666775EAI#6#6777J 8 8


5Cw
7777 ebjj
2			BIr#      r  RdBucorr_matmasknormalized_cmapcbarrH   c
                    t          j        dd|          \  }
}	|rdddni }|rt          j        | f|||	|dd|dz
  id| nt          j        | f|||	|d	| |	j                                         |	                    |	j                                        |d
|           |	                    |	j	                                        d|           t          |	          }	|	j                            d           |	j        D ]}t          |                                          }d|cxk    rdk     rn n|                    d           Id|cxk     rdk    rn n|                    d           o|dk    r|                    d           |dk    r|                    d           d|cxk     rdk     rn n|                    d           |                    t#          |d                     |	S )a  
    Presents a `seaborn` heatmap visualization of missing data correlation.
    Note that this visualization has no special support for large datasets.

    Inspired by https://github.com/ResidentMario/missingno

    Args:
        corr_mat: correlation matrix.
        mask: Upper-triangle mask.
        figsize: The size of the figure to display. Defaults to (20, 12).
        fontsize: The figure's font size.
        labels: Whether or not to label each matrix entry with its correlation (default is True).
        label_rotation: What angle to rotate the text labels to. Defaults to 45 degrees.
        cmap: Which colormap to use. Defaults to `RdBu`.
        normalized_cmap: Use a normalized colormap threshold or not. Defaults to True
    Returns:
        The plot axis.
    r6   r+   rO   )r   r   Tsizer/   )r  r   rH   r  annot	annot_kws)r  r   rH   r  rS  r  r   r   Fgffffff?z<1gffffffz>-11z-1gg?r|  )r:   r   r   heatmapr\   tick_bottomr`   get_majorticklabelsr   ra   r  patchrb   textsr   get_textset_textr  )r  r  r)   r   r   r  r   r  r  rH   r   	norm_argstextrQ  s                 r!   missing_heatmapr  
  s`   < LAw///EAr+:BQ'''I S		
x!|,		
 		
 		
 		
 		
 		
 	HR4drRR	RRR H
$$&&	     rx3355HUUU			BH ' '$--//""1====q=====MM$!____u_____MM%    !VVMM#"WWMM$QMM"MM%1++&&&&Ir#   )r$   )r$   FF)F)rO   )r   r   )r4  )r$   F)ry   )rh  )Nr   N)r  r  )Nr   Nr  r  )r  )r  r  Tr  rs   )r  r  r  Trs   )r  r  Trs   r  TTN)`__doc__r   typingr   r   r   r   r   r   r  numpyrX   pandasr  seabornr   r	   r:   matplotlib.collectionsr
   matplotlib.colorsr   r   r   r   matplotlib.datesr   r   matplotlib.patchesr   matplotlib.tickerr   r   statsmodels.graphics.tsaplotsr   r   	typeguardr   rG   r   ydata_profiling.configr   ydata_profiling.utils.commonr   %ydata_profiling.visualisation.contextr   #ydata_profiling.visualisation.utilsr   r  r   r"   r  tupleFigurerI   ndarrayboolrk   rn   r^   r   r   r   	DataFramer   r   r   r   Axesr   Legendr   r  r%  r-  rC   Axisr3  r;   rK  rZ  r9   r\  r   ra  rg  rz  r  r  r  r  r  r  r  r  r  r   r#   r!   <module>r     s   . .  > > > > > > > > > > > > > > > >                 $ $ $ $ $ $ 1 1 1 1 1 1 X X X X X X X X X X X X B B B B B B B B $ $ $ $ $ $ 8 8 8 8 8 8 8 8 = = = = = = = = ! ! ! ! ! !       + + + + + + F F F F F F K K K K K K @ @ @ @ @ @Q Qs Qs Q Q Q Q  ")T")_,-  	Z	   < I IIJI RZ
 I 	I
 I I 	ZI I I IX $H $29 $ $ $ $ $
 
 	$ $$J$ RZ
 $ 	$
 	$ $ $ $0 
 	$ $$J$ RZ
 $ 	$
 	$ $ $ $:B
1>A
BBB B B B*     , *$ *$x *$r| *$3 *$PS *$ *$ *$ *$Z $H $bi $C $ $ $ $8 PX$ $$ i$25$JM$$ $ $ $> $$!y$359$GJ$UX$$ $ $ $F 8=8 8
)8!8048
38Z&--.8 8 8 8x 8=* *
)*!*04*
38Z&--.* * * *Z ::
): 	: : : :z	 	d3i 	 	 	 		I	
/
	 _	 	 	 	 
 	+$ +$+$")T")_,-+$ 	4	?*
++$ 	+$
 +$ +$ +$ +$\  	+$ +$+$+$ +$ 	+$
 +$ +$ +$ +$b  $	/"  	   <  $-$ $$$	/"$ 5%< $ 		$ $ $ $4' '") ' ' ' ' ' ;B $  $ $ i $27 $ $  $  $  $H AH&$ &$&$"29o&$8=&$&$ &$ &$ &$R GN7 77#D")O47?D77 7 7 7 *.-1, ,|,, U39%&, 	,
  S	*, \, , , ,b  ' 
38_  	X	   &  *.-1& | U39%& 	
  S	* 38_  	X   > 28 
/
+._    $,18 8I88 5%< 8 	8
 8 8 8 _8 8 8 8~ $,17 77#Y7 7 5%< 	7
 7 7 7 7 _7 7 7 7z $, #J JJ
J 5%< J 	J
 J J J J J 	J _J J J J J Jr#   