
    Q/Ph                         d dl mZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ  ej        e	ej        e          de	dej        dedeeeef         eej                 f         fd	            ZdS )
    )AnyDictOptionalSequenceTupleN)Settings)get_duplicatesconfigdfsupported_columnsreturnc                 @   | j         j        }i }|dk    r|rt          |          dk    r| j         j        }||j        v rt          d| d          |                    |d          }||                             d                               |dd	          	                                
                    |
          }t          ||                   |d<   |d         t          |          z  |d<   ||                    ||          fS d|d<   d|d<   |dfS |dfS )a  Obtain the most occurring duplicate rows in the DataFrame.

    Args:
        config: report Settings object
        df: the Pandas DataFrame.
        supported_columns: the columns to consider

    Returns:
        A subset of the DataFrame, ordered by occurrence.
    r   zDuplicates key (z}) may not be part of the DataFrame. Either change the  column name in the DataFrame or change the 'duplicates.key' parameter.F)subsetkeepc                     d S )N )_s    n/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/ydata_profiling/model/pandas/duplicates_pandas.py<lambda>z'pandas_get_duplicates.<locals>.<lambda>&   s    T     )indexT)dropnaobserved)namen_duplicatesp_duplicatesg        N)
duplicatesheadlenkeycolumns
ValueError
duplicatedrename_axisgroupbysizereset_indexnlargest)r
   r   r   n_headmetricsduplicates_keyduplicated_rowss          r   pandas_get_duplicatesr-   	   sX    #F Gzz 	!R1#.2N++ _~ _ _ _  
 !mm3D5mQQO?#>>22*54HH.11  '*/.*I&J&JGN#&-n&=B&GGN# ((@@ 
 '(GN#&)GN#D= }r   )typingr   r   r   r   r   pandaspdydata_profiling.configr    ydata_profiling.model.duplicatesr	   register	DataFramestrr-   r   r   r   <module>r6      s    7 7 7 7 7 7 7 7 7 7 7 7 7 7     + + + + + + ; ; ; ; ; ; 2<::..,.;C.
4S>8BL112. . . ;:. . .r   