
    Q/Ph                     d    d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d Z
d Zd ZddZddZdS )    )division)tee)
itemgetter)defaultdict)logc                     t          t          |d                    | z  t          t          d|z
  d                    || z
  z  z   S )Ng|=   )r   max)knxs      V/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/wordcloud/tokenization.pylr      sC     s1e}}!CAE5(9(9$:$:a!e$DDD    c                    ||k    s||k    rdS |}| }|}|}||z  }||z  }	||z
  ||z
  z  }
t          |||          t          ||z
  ||z
  |          z   t          |||	          z
  t          ||z
  ||z
  |
          z
  }d|z  S )zCollocation scorer   )r   )count_bigramcount1count2n_wordsNc12c1c2pp1p2scores               r   r   r      s    &Gv--qA
C	B	B
QA	rB
s(q2v	BsB]]QrCxR333b"~~ !"s(AFB 7 78E:r   c                 f    t          |           \  }}t          |d            t          ||          S N)r   nextzip)iterableabs      r   pairwiser&      s.     x==DAqDMMMq!99r   T   c                 F   t          fdt          |           D                       }t          fd| D                       }t          |          }t          ||          \  }}t          d |D             |          \  }	}
|                                }|	                                D ]\  }}t          |                    d                    }||d                                                  }||d                                                  }t          |||         ||         |          }||k    r7||xx         |	|         z  cc<   ||xx         |	|         z  cc<   |	|         ||<   t          |                                          D ]\  }}|dk    r||= |S )Nc              3   R   K   | ]!}t          fd |D                       |V  "dS )c              3   D   K   | ]}|                                 v V  d S r    lower.0w	stopwordss     r   	<genexpr>z1unigrams_and_bigrams.<locals>.<genexpr>.<genexpr>+   s1      8[8[TUi9O8[8[8[8[8[8[r   N)any)r.   r   r0   s     r   r1   z'unigrams_and_bigrams.<locals>.<genexpr>+   sF      \\S8[8[8[8[YZ8[8[8[5[5[\1\\\\\\r   c              3   H   K   | ]}|                                 v|V  d S r    r+   r-   s     r   r1   z'unigrams_and_bigrams.<locals>.<genexpr>,   s5      CC!		(B(BA(B(B(B(BCCr   )normalize_pluralsc                 8    g | ]}d                      |          S ) )join)r.   bigrams     r   
<listcomp>z(unigrams_and_bigrams.<locals>.<listcomp>1   s$    000f&		000r   r6   r   r	   )
listr&   lenprocess_tokenscopyitemstuplesplitr,   r   )wordsr0   r4   collocation_thresholdbigramsunigramsr   counts_unigramsstandard_formcounts_bigramsstandard_form_bigramsorig_countsbigram_stringcountr8   word1word2collocation_scorewords    `                 r   unigrams_and_bigramsrP   '   s    \\\\huoo\\\\\GCCCCuCCCCCH(mmG%3$5&7 &7 &7"O],:00000+-- -- --)N) "&&((K !/ 4 4 6 6 K Ku}**3//00fQioo//0fQioo//0!%U);[=OQXYY444
 E"""n]&CC"""E"""n]&CC"""-;M-JOM*O113344 & &eA::%r   c                    t          t                    }| D ]:}|                                }||         }|                    |d          dz   ||<   ;|ri }t	          |                                          D ]}|                    d          r{|                    d          sf|dd         }||v rX||         }	||         }
|	                                D ]+\  }}|dd         }|
                    |d          |z   |
|<   ,|||<   ||= i }i }t          d          }|                                D ]W\  }}t          |                                |          d         }t          |                                          ||<   |||<   X|r7|                                D ]"\  }}||                                         ||<   #||fS )a  Normalize cases and remove plurals.

    Each word is represented by the most common case.
    If a word appears with an "s" on the end and without an "s" on the end,
    the version with "s" is assumed to be a plural and merged with the
    version without "s" (except if the word ends with "ss").

    Parameters
    ----------
    words : iterable of strings
        Words to count.

    normalize_plurals : bool, default=True
        Whether to try and detect plurals and remove trailing "s".

    Returns
    -------
    counts : dict from string to int
        Counts for each unique word, with cases represented by the most common
        case, and plurals removed.

    standard_forms : dict from string to string
        For each lower-case word the standard capitalization.
    r   r	   sssN)key)r   dictr,   getr:   keysendswithr>   r   r
   sumvalues)rA   r4   drO   
word_lower	case_dictmerged_pluralsrU   key_singulardict_pluraldict_singularrK   singularfused_casesstandard_casesitem1firstplurals                     r   r<   r<   K   s   : 	DA 5 5ZZ\\
jM	#--a0014	$ >> 	 	C||C   
d);); 
"3B3x1$$"#C&K$%lOM'2'8'8':': D De#'9)--h::UB &h//*6N3'#KNqMME!" + +
IIOO%%5111!4 !1!1!3!344E%*z"" F . 4 4 6 6 	F 	FFH%3HNN4D4D%EN6""&&r   N)Tr'   )T)
__future__r   	itertoolsr   operatorr   collectionsr   mathr   r   r   r&   rP   r<    r   r   <module>ro      s                      # # # # # #      E E E  "  ! ! ! !H?' ?' ?' ?' ?' ?'r   