
    -Ph%                         d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ  G d
 d          Zd ZdS )zkRecommendation system generator.

Generate recommendations based on TF-IDF representation and a KNN model.
    N)defaultdict)Path   )THUMBNAIL_PARENT_DIVTHUMBNAIL_PARENT_DIV_CLOSE_thumbnail_div)extract_intro_and_title)split_code_and_text_blocks)_replace_md5c                   ^    e Zd ZdZdddddZd Zed             Zd	 Zd
 Z	ddZ
d Zd ZdS )ExampleRecommenderac  Compute content-based KNN-TF-IFD recommendation system.

    Parameters
    ----------
    n_examples : int, default=5
        Number of most relevant examples to display.
    min_df : float in range [0.0, 1.0] or int, default=1
        When building the vocabulary ignore terms that have a document frequency
        strictly lower than the given threshold. If float, the parameter
        represents a proportion of documents, integer represents absolute
        counts. This value is also called cut-off in the literature.
    max_df : float in range [0.0, 1.0] or int, default=1.0
        When building the vocabulary ignore terms that have a document frequency
        strictly higher than the given threshold. If float, the parameter
        represents a proportion of documents, integer represents absolute
        counts.

    Attributes
    ----------
    file_names_ : list of str
        The list of file names used for computing the similarity matrix.
        The recommended examples are chosen among this list.

    similarity_matrix_ : dense matrix
        Fitted matrix of pairwise cosine similarities.
          g?
n_examplesmin_dfmax_dfc                0    || _         || _        || _        d S Nr   )selfr   r   r   s       Z/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sphinx_gallery/recommender.py__init__zExampleRecommender.__init__4   s    $    c                 j    d t          j        d|          D             }|                     |          S )z@Extract a dict mapping raw tokens from doc to their occurrences.c              3   >   K   | ]}|                                 V  d S r   )lower).0toks     r   	<genexpr>z1ExampleRecommender.token_freqs.<locals>.<genexpr>;   s*      JJ3399;;JJJJJJr   z\w+)refindall
dict_freqs)r   doctoken_generators      r   token_freqszExampleRecommender.token_freqs9   s3    JJ"*VS2I2IJJJ///r   c                 X    t          t                    }| D ]}||xx         dz  cc<   |S )z;Extract a dict mapping list of tokens to their occurrences.r   )r   int)r#   freqr   s      r   r"   zExampleRecommender.dict_freqs>   s<     3 	 	CIIINIIIIr   c                    ddl }g }t          t                    }|D ]L}|                                D ]5\  }}|                    |           ||                             |           6Md t          t          |                    D             }|                    t          |          t          |          f          }	t          |          D ],\  }
}|                                D ]\  }}||	|
||         f<   -|	S )a  Convert a dictionary of feature occurrence frequencies into a matrix.

        Parameters
        ----------
        data : list of dict
            Each dictionary represents a document where tokens are keys and
            values are their occurrence frequencies.

        Returns
        -------
        X : ndarray of shape (n_samples, n_features)
            A matrix of occurrences where n_samples is the number of samples in
            the dataset and n_features is the total number of features across
            all samples.
        r   Nc                     i | ]\  }}||	S  r+   )r   	token_idxtokens      r   
<dictcomp>z6ExampleRecommender.dict_vectorizer.<locals>.<dictcomp>_   s+     
 
 
!1EE9
 
 
r   )	numpyr   listitemsappend	enumeratesortedzeroslen)r   datanptokens
all_valuesdict_of_freqsr-   r(   
token_dictXdict_of_freqs_idxs              r   dict_vectorizerz"ExampleRecommender.dict_vectorizerF   s1     	 &&
! 	/ 	/M,2244 / /te$$$5!((..../
 
5>vj?Q?Q5R5R
 
 

 HHc$iiZ12209$ 	? 	?,},2244 ? ?t:>#Z%6677?r   c                     ddl }|j        d         dz   }|                    |d          dz   }|                    ||z            dz   }||z  }|j        |j                            |d          z  j        }|S )aQ  Transform a term frequency matrix into a TF-IDF matrix.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            A term frequency matrix.

        Returns
        -------
        X_tfidf : ndarray of shape (n_samples, n_features)
            A tf-idf matrix of the same shape as X.
        r   Nr   axis)r/   shapecount_nonzerologTlinalgnorm)r   r=   r8   	n_samplesdfidfX_tfidfs          r   compute_tf_idfz!ExampleRecommender.compute_tf_idfh   s     	GAJN	aa((1,ffY^$$q(c'9ry~~gA~>>>Ar   Nc                     ddl }||u s||}||j                            |          z  }||u r|}n||j                            |          z  }||j        z  }|S )a  Compute the cosine similarity between two vectors X and Y.

        Parameters
        ----------
        X : ndarray of shape (n_samples_X, n_features)
            Input data.

        Y : ndarray of shape (n_samples_Y, n_features), default=None
            Input data. If `None`, the output will be the pairwise
            similarities between all samples in `X`.

        Returns
        -------
        cosine_similarity : ndarray of shape (n_samples_X, n_samples_Y)
            Cosine similarity matrix.
        r   N)r/   rG   rH   rF   )r   r=   Yr8   X_normalizedY_normalized
similaritys          r   cosine_similarityz$ExampleRecommender.cosine_similarity   st    " 	66QYA29>>!,,,66'LLry~~a000L!LN2
r   c                 *  	 ddl }| j        }| j        }| j        }t	          |t
          j                  st          d          |dk     rt          d          t	          |t
          j                  r|dk    s3t	          |t                    rd|cxk    rdk    sn t          d          t	          |t
          j                  r|dk    s3t	          |t                    rd|cxk    rdk    sn t          d	          | j	        	| 
                    	fd
|D                       }t	          |t                    r0t          |                    ||j        d         z                      }t	          |t                    r0t          |                    ||j        d         z                      }|                    |d          }||k    ||k    z  }|                     |                     |dd|f                             | _        || _        | S )zCompute the similarity matrix of a group of documents.

        Parameters
        ----------
        file_names : list or generator of file names.

        Returns
        -------
        self : object
            Fitted recommender.
        r   Nzn_examples must be an integerr   z$n_examples must be strictly positiveg        g      ?z/min_df must be float in range [0.0, 1.0] or intz/max_df must be float in range [0.0, 1.0] or intc                 h    g | ].} t          |                              d                     /S )utf-8encoding)r   	read_text)r   fname	freq_funcs     r   
<listcomp>z*ExampleRecommender.fit.<locals>.<listcomp>   s:    XXXEYYtE{{,,g,>>??XXXr   rA   )r/   r   r   r   
isinstancenumbersIntegral
ValueErrorfloatr%   r?   r'   ceilrC   floorsumrS   rM   similarity_matrix_file_names_)
r   
file_namesr8   r   r   r   counts_matrixdoc_appearancesmaskr[   s
            @r   fitzExampleRecommender.fit   s9    	_
*g&677 	E<===!^^CDDD 011	P6<kk65)) 7B.1V.B.B.B.Bs.B.B.B.BNOOO 011	P6<qjj65)) 7A.1V.B.B.B.Bs.B.B.B.BNOOO$	,,XXXXZXXX
 
 fe$$ 	C-*=a*@!@AABBFfe$$ 	D&=+>q+A"ABBCCF&&Q&776)o.GH"&"8"8aaag 677#
 #
 &r   c                      j                             |          }t          t           j        |                             }t          |d d          }d |d j        dz            D             } fd|D             }|S )aN  Compute the `n_examples` most similar documents to the query.

        Parameters
        ----------
        file_name : str
            Name of the file corresponding to the query index `item_id`.

        Returns
        -------
        recommendations : list of str
            Name of the files most similar to the query.
        c                     | d         S )Nr   r+   )xs    r   <lambda>z,ExampleRecommender.predict.<locals>.<lambda>   s
    1Q4 r   T)keyreversec                     g | ]\  }}|S r+   r+   )r   idx_s      r   r\   z.ExampleRecommender.predict.<locals>.<listcomp>   s    OOOvsAsOOOr   r   c                 *    g | ]}j         |         S r+   )rf   )r   rs   r   s     r   r\   z.ExampleRecommender.predict.<locals>.<listcomp>   s!    HHHS4+C0HHHr   )rf   indexr0   r3   re   r4   r   )r   	file_nameitem_idsimilar_itemssorted_itemstop_k_itemsrecommendationss   `      r   predictzExampleRecommender.predict   s     "((33Yt'>w'GHHIImNNN
 POa$/A:M6M)NOOOHHHHKHHHr   r   )__name__
__module____qualname____doc__r   r%   staticmethodr"   r?   rM   rS   rk   r}   r+   r   r   r   r      s         6 &'q     
0 0 0
   \     D  0   >. . .`    r   r   c                    t          |          }|j        |j        z   d}|                     |          }d}|d                             d|          }t          |dd          5 }|                    d| d	           |                    t                     |D ]{}	t          |	          }
t          |	d
          \  }}t          ||d         j
                  \  }}|                    t          |
j        |d         |
j        ||d                     ||                    t                     ddd           n# 1 swxY w Y   t          |d           dS )aC  Generate `.recommendations` reST file for a given example.

    Parameters
    ----------
    recommender : ExampleRecommender
        Instance of a fitted ExampleRecommender.

    fname : str
        Path to the example file.

    gallery_conf : dict
        Configuration dictionary for the sphinx-gallery extension.
    z.recommendations.newzRelated examplesrecommenderrubric_headerwrV   rW   z

.. rubric:: 
F)return_noder   src_dirT)
is_backrefNt)mode)r   parentstemr}   getopenwriter   r
   r	   contentr   namer   r   )r   rZ   gallery_conf
path_fnamerecommend_fnamerecommended_examplesdefault_rubric_headerr   ex_fileexample_fnameexample_pathrt   script_blocksintrotitles                  r   _write_recommendationsr      s    eJ#*Z_<RRRO&..u55. /33. M 
osW	5	5	5 2::::;;;*+++1 	 	M..L95     A} 35-:J:RSSLE5MM ' + %#  	 	 	 	 	0111'2 2 2 2 2 2 2 2 2 2 2 2 2 2 2( s++++++s   'CD??EE)r   r^   r    collectionsr   pathlibr   backreferencesr   r   r   gen_rstr	   py_source_parserr
   utilsr   r   r   r+   r   r   <module>r      s      				 # # # # # #               
 - , , , , , 8 8 8 8 8 8      M M M M M M M M`+, +, +, +, +,r   