
    -Ph\                    J   d Z ddlmZ ddlZddlmZ ddlmZ ddlZddl	m
Z
 erddlmZ 	 ddlZddlmZ dd	lmZ  eej        d
ej                                                                                  Z[n# e$ r dZddZdddZY nw xY wh dZdZ G d de
          ZdS )z9Chinese search language: includes routine to split words.    )annotationsN)Path)TYPE_CHECKING)SearchLanguage)Iterator)cut_for_search)load_userdictz.. fstrreturnNonec                    d S N )r   s    P/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sphinx/search/zh.pyjieba_load_userdictr      s        TsentenceHMMboolIterator[str]c              #     K   dE d {V  d S )Nr   r   )r   r   s     r   r   r      s      r   >!   aasatbebyifinisitnoofonortoandarebutfornotthewasintonearsuchthatthentheythiswillwiththeirtherethesea  
/**
 * Porter Stemmer
 */
var Stemmer = function() {

  var step2list = {
    ational: 'ate',
    tional: 'tion',
    enci: 'ence',
    anci: 'ance',
    izer: 'ize',
    bli: 'ble',
    alli: 'al',
    entli: 'ent',
    eli: 'e',
    ousli: 'ous',
    ization: 'ize',
    ation: 'ate',
    ator: 'ate',
    alism: 'al',
    iveness: 'ive',
    fulness: 'ful',
    ousness: 'ous',
    aliti: 'al',
    iviti: 'ive',
    biliti: 'ble',
    logi: 'log'
  };

  var step3list = {
    icate: 'ic',
    ative: '',
    alize: 'al',
    iciti: 'ic',
    ical: 'ic',
    ful: '',
    ness: ''
  };

  var c = "[^aeiou]";          // consonant
  var v = "[aeiouy]";          // vowel
  var C = c + "[^aeiouy]*";    // consonant sequence
  var V = v + "[aeiou]*";      // vowel sequence

  var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
  var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
  var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
  var s_v   = "^(" + C + ")?" + v;                         // vowel in stem

  this.stemWord = function (w) {
    var stem;
    var suffix;
    var firstch;
    var origword = w;

    if (w.length < 3)
      return w;

    var re;
    var re2;
    var re3;
    var re4;

    firstch = w.substr(0,1);
    if (firstch == "y")
      w = firstch.toUpperCase() + w.substr(1);

    // Step 1a
    re = /^(.+?)(ss|i)es$/;
    re2 = /^(.+?)([^s])s$/;

    if (re.test(w))
      w = w.replace(re,"$1$2");
    else if (re2.test(w))
      w = w.replace(re2,"$1$2");

    // Step 1b
    re = /^(.+?)eed$/;
    re2 = /^(.+?)(ed|ing)$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      re = new RegExp(mgr0);
      if (re.test(fp[1])) {
        re = /.$/;
        w = w.replace(re,"");
      }
    }
    else if (re2.test(w)) {
      var fp = re2.exec(w);
      stem = fp[1];
      re2 = new RegExp(s_v);
      if (re2.test(stem)) {
        w = stem;
        re2 = /(at|bl|iz)$/;
        re3 = new RegExp("([^aeiouylsz])\\1$");
        re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
        if (re2.test(w))
          w = w + "e";
        else if (re3.test(w)) {
          re = /.$/;
          w = w.replace(re,"");
        }
        else if (re4.test(w))
          w = w + "e";
      }
    }

    // Step 1c
    re = /^(.+?)y$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      stem = fp[1];
      re = new RegExp(s_v);
      if (re.test(stem))
        w = stem + "i";
    }

    // Step 2
    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      stem = fp[1];
      suffix = fp[2];
      re = new RegExp(mgr0);
      if (re.test(stem))
        w = stem + step2list[suffix];
    }

    // Step 3
    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      stem = fp[1];
      suffix = fp[2];
      re = new RegExp(mgr0);
      if (re.test(stem))
        w = stem + step3list[suffix];
    }

    // Step 4
    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
    re2 = /^(.+?)(s|t)(ion)$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      stem = fp[1];
      re = new RegExp(mgr1);
      if (re.test(stem))
        w = stem;
    }
    else if (re2.test(w)) {
      var fp = re2.exec(w);
      stem = fp[1] + fp[2];
      re2 = new RegExp(mgr1);
      if (re2.test(stem))
        w = stem;
    }

    // Step 5
    re = /^(.+?)e$/;
    if (re.test(w)) {
      var fp = re.exec(w);
      stem = fp[1];
      re = new RegExp(mgr1);
      re2 = new RegExp(meq1);
      re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
        w = stem;
    }
    re = /ll$/;
    re2 = new RegExp(mgr1);
    if (re.test(w) && re2.test(w)) {
      re = /.$/;
      w = w.replace(re,"");
    }

    // and turn initial Y back to y
    if (firstch == "y")
      w = firstch.toLowerCase() + w.substr(1);
    return w;
  }
}
c                  t     e Zd ZdZdZdZeZeZ	 e
j        d          Zd fd	Zdd
ZddZddZddZ xZS )SearchChinesezChinese search implementationzhChinesez[a-zA-Z0-9_]+optionsdict[str, str]r   r   c                p    t                                          |           t                      | _        d S r   )super__init__setlatin_terms)selfr?   	__class__s     r   rC   zSearchChinese.__init__   s-    !!!%(UUr   c                    |                     dt                    }|r=t          |                                          rt	          t          |                     t          j        d          | _        d S )Ndictenglish)getJIEBA_DEFAULT_DICTr   is_filer   r   snowballstemmerstemmer)rF   r?   	dict_paths      r   initzSearchChinese.init   sa    KK(:;;	 	0i0022 	0I///&.y99r   inputr   	list[str]c                    t          t          |                    }d | j                            |          D             }| j                            |           ||z   S )Nc                6    g | ]}|                                 S r   )strip).0terms     r   
<listcomp>z'SearchChinese.split.<locals>.<listcomp>  s     NNN4$**,,NNNr   )listr   latin1_lettersfindallrE   update)rF   rR   chineselatin1s       r   splitzSearchChinese.split   s]    !."7"788NN4+>+F+Fu+M+MNNN'''r   stemmed_wordr   c                (    t          |          dk    S )N   )len)rF   ra   s     r   word_filterzSearchChinese.word_filter  s    <  1$$r   wordc                    | j                             |                                          }t          |          dcxk    ot          |          k    nc o|| j        v }|r|                                S |S )N   )rO   stemWordlowerrd   rE   )rF   rf   stemmedshould_not_be_stemmeds       r   stemzSearchChinese.stem  s{     ,''

55II))))S\\))))Fdd6F.F 	 ! 	 ::<<r   )r?   r@   r   r   )rR   r   r   rS   )ra   r   r   r   )rf   r   r   r   )__name__
__module____qualname____doc__langlanguage_namejs_porter_stemmerjs_stemmer_codeenglish_stopwords	stopwordsrecompiler[   rC   rQ   r`   re   rm   __classcell__)rG   s   @r   r<   r<      s        ''DM'O!IRZ 011N+ + + + + +: : : :       % % % %
 
 
 
 
 
 
 
r   r<   )r   r   r   r   )T)r   r   r   r   r   r   )rq   
__future__r   rx   pathlibr   typingr   rN   sphinx.searchr   collections.abcr   jiebar   r	   r   __file__DEFAULT_DICT_NAMEresolveas_posixrL   ImportErrorrv   rt   r<   r   r   r   <module>r      s   ? ? " " " " " " 				                       ( ( ( ( ( ( )((((((LLL$$$$$$:::::: 	U^T5#:;;CCEENNPP  	             
 
 
 x v( ( ( ( (N ( ( ( ( (s   A9 9BB