
    bMh_g                        d dl m Z  d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zej                            dddg          d             Zd Zd Zd Z d Z!ej                            ddddgfdd dgfg          d             Z"d Z#d  Z$ej                            d!g d"fg d#fg d$fg d%fg d&d"fg d&d%fg d&d$fg d&d%fg          d'             Z%d( Z&d) Z'd* Z(d+ Z)d, Z*d- Z+dS ).    )datetimeN)
ArrowDtype)	DataFrameIndex
MultiIndexSeries_testingc                     t          dt          j        dg|           }t          j        t
          d          5  |j                            dd            d d d            d S # 1 swxY w Y   d S )NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)r   npnanpytestraises
ValueErrorstrextract)any_string_dtypevaluess     a/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raisesr      s    %rvu5=MNNNF	z)G	H	H	H > >
/===> > > > > > > > > > > > > > > > > >s   A$$A(+A(c                 8   t          dt          j        dg|           }t          dt          j        t          j        g|           }|j                            d          }t          j        ||           |j                            dd          }t          j        ||           t          ddgt          j        t          j        gt          j        t          j        gg|           }|j                            d	d
          }t          j        ||           d S )Nr   r   r   BAD__z.*(BAD[_]+).*Tr   BADr   F)r   r   r   r   r   r   tmassert_frame_equal)r   sexpectedresults       r   test_extract_expand_kwargr'      s     "&%08HIIIA'26262:JKKKHU]]?++F&(+++U]]?4]88F&(+++
5	BFBF+bfbf-=>FV  H U]]/]>>F&(+++++    c                  \   t          dt          j        ddt          j                    dd ddg	          } | j                            dd	          }t          j        t          j        g}t          d
dg|d
dg||||||g	t                    }t          j
        ||           | j                            dd	          }t          d
t          j        d
t          j        t          j        t          j        d t          j        t          j        g	t                    }t          j        ||           d S )NaBAD_BAD	BAD_b_BADTr             @r   Fr   BAD_r!   r   z.*(BAD[_]+).*BAD)r   r   r   r   todayr   r   r   objectr"   r#   assert_series_equal)serr&   err%   s       r   &test_extract_expand_False_mixed_objectr4   *   s   
	RV[$0@0@%qRUV C
 W__1%_@@F
&"&	B
%"vuor2r2r2Ff  H &(+++ W__/_>>F	rvrvN  H 68,,,,,r(   c                      t          g d          } d}t          j        t          |          5  | j                            dd           d d d            d S # 1 swxY w Y   d S )N)A1A2A3A4B5z,only one regex group is supported with Indexr   ([AB])([123])Fr   )r   r   r   r   r   r   )idxmsgs     r    test_extract_expand_index_raisesr>   @   s     ...
/
/C
8C	z	-	-	- 7 76667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   AA Ac                 f    | g d|          }d}t          j        t          |          5  |j                            dd           d d d            n# 1 swxY w Y   t          j        t          |          5  |j                            dd           d d d            d S # 1 swxY w Y   d S )	Nr6   B2C3r   "pattern contains no capture groupsr   
[ABC][123]Fr   
(?:[AB]).*r   r   r   r   r   index_or_seriesr   s_or_idxr=   s       r   ,test_extract_expand_no_capture_groups_raisesrJ   J   s@   1119IJJJH
.C 
z	-	-	- 9 9\%8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	-	-	- 9 9\%8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9#   AAA<B&&B*-B*c                      | ddg|          }|j                             dd          } | ddgd|	          }| t          k    rt          j        ||           d S t          j        ||           d S )
Nr6   r7   r   (?P<uno>A)\dFr   Aunonamer   )r   r   r   r"   r1   assert_index_equalrH   r   rI   r&   r%   s        r   (test_extract_expand_single_capture_grouprT   W   s    d|3CDDDH\!!/%!@@FSz=MNNNH&  
vx00000
fh/////r(   c                    t          g d|           }|j                            dd          }t          t          j        t          j        t          j        g|           }t          j        ||           |j                            dd          }t          t          j        t          j        gt          j        t          j        gt          j        t          j        gg|           }t          j        ||           |j                            dd          }t          dd	t          j        g|           }t          j        ||           |j                            d
d          }t          ddgd	dgt          j        t          j        gg|           }t          j        ||           |j                            dd          }t          dd	t          j        gd|           }t          j        ||           |j                            dd          }t          ddgd	dgt          j        t          j        ggddg|           }t          j        ||           |j                            dd          }t          ddgd	dgt          j        t          j        ggddg|           }t          j        ||           |j                            dd          }t          dd	t          j        g|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgt          j        t          j        gg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgt          j        dggddg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgdt          j        ggddg|           }t          j        ||           d S )Nr@   r   (_)Fr   (_)(_)([AB])[123]rN   Br;   12(?P<letter>[AB])letterrP   !(?P<letter>[AB])(?P<number>[123])numbercolumnsr   ([AB])(?P<number>[123])r   ([AB])(?:[123])A11B22C33([AB])([123])(?:[123])r6   rA   3"(?P<letter>[AB])?(?P<number>[123])rj   r6   rA   C#(?P<letter>[ABC])(?P<number>[123])?rm   )	r   r   r   r   r   r"   r1   r   r#   r   r$   r&   r%   s       r   "test_extract_expand_capture_groupsrp   c   s^   !!!)9:::AU]]5]//Frvrvrv.6FGGGH68,,, U]]8E]22F
&"&	BFBF+bfbf-=>FV  H &(+++ U]]=]77FsC(0@AAAH68,,, U]]?5]99F
sc3Z"&"&!12:J  H &(+++ U]]-e]<<FsC(x?OPPPH68,,, U]]>u]MMF
sc3Z"&"&!128$  H
 &(+++ U]]4U]CCF
sc3Z"&"&!12H  H
 &(+++ U]],U];;FsC(0@AAAH68,,, 	$$$,<===AU]]3E]BBF
sc3Z"&"&!12:J  H &(+++ 	   (8999AU]]?]NNF
sc3Z"&#/8$  H
 &(+++ 	   (8999AU]]@]OOF
sc3Z#rv/8$  H
 &(+++++r(   c                    g d}t          |           dk    rt          j        d           t          |           t          |          k     r5|                     d          } t          |           t          |          k     5| d t          |                   } t	          || |          }|j                            dd          }t	          d	d
t          j        g| |          }t          j
        ||           |j                            dd          }t          dd	gdd
gdt          j        ggddg| |          }t          j        ||           d S )Nrl   r   zTest requires len(index) > 0   indexr   (\d)Fr   rZ   r[   (?P<letter>\D)(?P<number>\d)?rN   rY   rm   r]   r_   ra   rt   r   )lenr   skiprepeatr   r   r   r   r   r"   r1   r   r#   )rt   r   datar2   r&   r%   s         r   (test_extract_expand_capture_groups_indexr|      sN    D
5zzQ2333
e**s4yy
 
 Q e**s4yy
 
  +CII+E
U*:
;
;
;CW__WU_33FsC(=MNNNH68,,,W__=e_LLF
sc3Z#rv/8$	  H &(+++++r(   c                     t          g dd|           }|j                            dd          }t          g dd|           }t          j        ||           d S )	Na3b3c2bobrP   z(?P<sue>[a-z])Fr   abcsue)r   r   r   r"   r1   ro   s       r   ,test_extract_single_series_name_is_preservedr      sh    !!!5EFFFAU]],U];;FoooE9IJJJH68,,,,,r(   c                 (   t          dt          j        dg|           }|j                            dd          }t          ddgt          j        t          j        gt          j        t          j        gg|           }t          j        ||           d S )	Nr   r   r   r   Tr   r    r!   )r   r   r   r   r   r   r"   r#   ro   s       r   test_extract_expand_Truer      s     "&%08HIIIAU]]/]==F
5	BFBF+bfbf-=>FV  H &(+++++r(   c                  D   t           j        t           j        g} t          dt           j        ddt          j                    dd ddg	          }|j                            dd          }t          d	d
g| d	d
g| | | | | | g	t                    }t          j
        ||           d S )Nr*   r+   Tr   r,   r-   r   r   r.   r!   r   )r   r   r   r   r/   r   r   r   r0   r"   r#   )r3   mixedr&   r%   s       r   %test_extract_expand_True_mixed_objectr      s    
&"&	BFN
	
 E Y3DAAF
%"vuor2r2r2Ff  H &(+++++r(   c                 f    | g d|          }d}t          j        t          |          5  |j                            dd           d d d            n# 1 swxY w Y   t          j        t          |          5  |j                            dd           d d d            d S # 1 swxY w Y   d S )	Nr@   r   rC   r   rD   Tr   rE   rF   rG   s       r   4test_extract_expand_True_single_capture_group_raisesr      s@   
 1119IJJJH
.C	z	-	-	- 8 8\$7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 
z	-	-	- 8 8\$7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8rK   c                      | ddg|          }|j                             dd          }t          dddgi|          }t          j        ||           d S )	Nr6   r7   r   rM   Tr   rO   rN   )r   r   r   r"   r#   rS   s        r   -test_extract_expand_True_single_capture_groupr   	  sj    d|3CDDDH\!!/$!??F%#s,4DEEEH&(+++++r(   rQ   series_namec                 &   t          g d| |          }|j                            dd          }t          t          j        t          j        t          j        g|          }t          j        ||           |j                            dd          }t          t          j        t          j        gt          j        t          j        gt          j        t          j        gg|          }t          j        ||           |j                            dd          }t          d	d
t          j        g|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        gg|          }t          j        ||           |j                            dd          }t          dd	d
t          j        gi|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        ggddg|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        ggddg|          }t          j        ||           |j                            dd          }t          d	d
t          j        g|          }t          j        ||           d S )Nr@   rP   rV   Tr   r   rW   rX   rN   rY   r;   rZ   r[   r\   r]   r^   r_   r`   rb   r   rc   r   r   r   r   r   r   r"   r#   )rQ   r   r$   r&   r%   s        r   test_extract_seriesr     s    	!!!4DEEEA U]]5]..F"&"&"&19IJJJH&(+++ U]]8D]11F
&"&	BFBF+bfbf-=>FV  H &(+++ U]]=]66F#sBF+3CDDDH&(+++ U]]?4]88F
sc3Z"&"&!12:J  H &(+++ U]]-d];;F(S#rv$67?OPPPH&(+++ U]]>t]LLF
sc3Z"&"&!128$  H
 &(+++ U]]4T]BBF
sc3Z"&"&!12H  H
 &(+++ U]],T]::F#sBF+3CDDDH&(+++++r(   c                    t          g d|           }|j                            dd          }t          ddgdd	gt          j        t          j        gg|           }t          j        ||           t          g d
|           }|j                            dd          }t          ddgdd	gt          j        dggddg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgdd	gdt          j        ggddg|           }t          j        ||           d S )Nrd   r   rh   Tr   rN   rZ   rY   r[   ri   rk   rj   r]   r_   r`   rl   rn   rm   r   ro   s       r   test_extract_optional_groupsr   K  s   $$$,<===AU]]3D]AAF
sc3Z"&"&!12:J  H &(+++ 	   (8999AU]]?]MMF
sc3Z"&#/8$  H
 &(+++ 	   (8999AU]]@]NNF
sc3Z#rv/8$  H
 &(+++++r(   c                 <   g d}t          |           t          |          k     r%t          j        dt          |           d           | d t          |                   } t          || |          }|j                            dd          }t          dd	t          j        g| |          }t          j
        ||           |j                            d
d          }t          ddgdd	gdt          j        ggddg| |          }t          j
        ||           d S )Nrl   zIndex needs more than z valuesrs   ru   Tr   rZ   r[   rv   rN   rY   rm   r]   r_   rw   )rx   r   ry   r   r   r   r   r   r   r"   r#   )rt   r   r{   r$   r&   r%   s         r   +test_extract_dataframe_capture_groups_indexr   i  s&    D
5zzCII?SYY???@@@+CII+Et5(8999AU]]74]00F#sBF+5@PQQQH&(+++U]];D]IIF
sc3Z#rv/8$	  H &(+++++r(   c                     t          g dd|           }|j                            dd          }t          dg di| 	          }t	          j        ||           d S )
Nr~   r   rP   (?P<letter>[a-z])Tr   r]   r   r   )r   r   r   r   r"   r#   ro   s       r   'test_extract_single_group_returns_framer     sm     	!!!=MNNNAU]]/]==F(OOO4<LMMMH&(+++++r(   c                 0   dddddt           j        dg}g d}d}g d	}t          || 
          }t          j        g dd          }t          |||| 
          }|j                            |t          j	                  }t          j        ||           t          j        g d          }	t          ||	|           }t          j        g dd          }t          |||| 
          }|j                            |t          j	                  }t          j        ||           t          ||	|           }d|j        _        d|_        t          |||| 
          }|j                            |t          j	                  }t          j        ||           d S )Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.com ))davegooglecom)tdhock5gmailr   )maudelaperrierer   r   )robr   r   )stever   r   )r   r   r   )r   dr   )efr   zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    )userdomaintldr   )r   r   r,   r   rr   r   )   r   )r   r,   )   r   )r   r,   )r   rr   Nr   names)flags))singleDave)r   Toby)r   Maude)multiplerobAndSteve)r   abcdef)nonemissing)r   emptyrs   ))r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r,   )r   r   r   )r   r   r,   )r   r   rr   )NNr   )matchesdescription)r   r   r   )r   r   r   r   from_tuplesr   r   
extractallreVERBOSEr"   r#   rt   r   )
r   r{   expected_tuplespatexpected_columnsr$   expected_indexr%   r&   mis
             r   test_extractallr     s   #1/

D	 	 	OC 100t+,,,A  +HHH  N )9AQ  H Uc44F&(+++ 
		
 	
 	


 

B 	t2%5666A+		
 		
 		
 $  N )9AQ  H Uc44F&(+++ 	t2%5666A.AGM>N)9AQ  H Uc44F&(+++++r(   zpat,expected_namesrk   r]   r_   z([AB])?(?P<number>[123])c                    t          g d|          }|j                            |           }t          dt          j        dft          j        dfgt          j        g dd          ||	          }t          j	        ||           d S )
N)r   r6   32r   )rN   rZ   rj   r[   )r   r   rr   r,   r   r   )rt   ra   r   )
r   r   r   r   r   r   r   r   r"   r#   )r   expected_namesr   r$   r&   r%   s         r   test_extractall_column_namesr     s     	'7888AUc""F	bfc]RVSM2$%=%=%=_UUU	  H &(+++++r(   c                 n   t          g dd|           }t          j        g dd          }|j                            d          }t          dg d	i|| 
          }t          j        ||           |j                            d          }t          g d	|| 
          }t          j        ||           d S )Nr   r   d4c2r   rP   r   r   r   r   r   r   r   r]   )r   r   r   r   rs   ([a-z]))r   r   r   r   r   r   r"   r#   )r   r$   r   r&   r%   s        r   test_extractall_single_groupr     s    ###-?OPPPA+(((  N
 U233F	'''(FV  H &(+++ Uj))FN:J  H &(+++++r(   c                     t          g dd|           }|j                            d          }t          g dt	          j        g dd          | 	          }t          j        ||           d S )
N)ab3abc3d4cd2r   rP   z([a-z]+))ababcr   cdr   r   r   rs   )r   r   r   r   r   r   r"   r#   ro   s       r   ,test_extractall_single_group_with_quantifierr     s     	'''mCSTTTAUk**F   $,,,O
 
 
   H &(+++++r(   zdata, names)N)i1)Ni2)r   r   r   c                   	 t          |           	t          |          dk    r%t          t          	          |d                   }n1	fdt          	          D             }t          j        ||          }t          | d||          }t          j        g |dz             }|j                            d	          }t          dg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           |j                            d          }t          dg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           d S )Nr,   r   rQ   c              3   D   K   | ]}t          |gd z
  z            V  dS )r,   N)tuple).0ins     r   	<genexpr>z-test_extractall_no_matches.<locals>.<genexpr>0  s5      991%q1u&&999999r(   r   r   rQ   rt   r   r   z(z)rw   z(z)(z)z(?P<first>z)firstz(?P<first>z)(?P<second>z)secondz(z)(?P<second>z))rx   r   ranger   r   r   r   r   r   r"   r#   )
r{   r   r   rt   tuplesr$   r   r&   r%   r   s
            @r   test_extractall_no_matchesr     s    	D		A
5zzQeAhhU1X...9999a999&vU;;;t-u<LMMMA+Buz7IKKKN Ue$$F!NBRSSSH&(+++ Uh''F!Q~EUVVVH&(+++ Un--F	7G  H &(+++ U9::F(#>AQ  H &(+++ U011FH^;K  H &(+++++r(   c                    t          g dd|           }|j                            d          }t          dg dit	          j        g dd dg	          | 
          }t          j        ||           | dk    rct          g dt                    t          g ddt                    fD ]1}|j                            d          }t          j        ||           2t          g ddt          g dd          |           }|j                            d          }t          dg dit	          j        g dddg	          | 
          }t          j        ||           d S )N)a1a2b1c1xxxrP   z[ab](?P<digit>\d)digit)rZ   r[   rZ   )r   )r   r,   r   r   r   rs   r0   r   s_name)XXyyzzidx_namer   r   ))r   r   )r   r,   )r   r   )
r   r   r   r   r   r   r"   r#   r   r0   )r   r$   r&   r%   r<   s        r   test_extractall_stringindexr   U  s   ###%7GHHHAU233F	///"$%=%=%=dG_UUU  H
 &(+++ 8##&&&f555&&&U&AAA
 	4 	4C W''(<==F!&(3333&&&Z888		 	 	A U233F	///"$---j'5J
 
 
   H &(+++++r(   c                     t          g dd|           }t          j        t          d          5  |j                            d           d d d            d S # 1 swxY w Y   d S )Nr   r   rP   zno capture groupsr   z[a-z])r   r   r   r   r   r   )r   r$   s     r   (test_extractall_no_capture_groups_raisesr   z  s     	###-?OPPPA	z)<	=	=	= # #	"""# # # # # # # # # # # # # # # # # #s   AAAc                  Z   t          g dg dd          } | j        j                            dd          }t	          g d          }t          j        ||           | j        j                            d	d          }g d
}t	          |ddg          }t          j        ||           d S )Nr   )r8   B3D4r   )rt   rQ   z([A-Z])Tr   )rN   rY   Dz!(?P<letter>[A-Z])(?P<digit>[0-9])))rN   rj   )rY   rj   )r   4r]   r   )ra   )r   rt   r   r   r   r"   r#   )r$   rr   e_lists       r   !test_extract_index_one_two_groupsr    s    ###+=+=+=MRRRA	Jt44A///""A!Q
 	
@NNA111F&8W"5666A!Qr(   c                 N   t          g dd|           }d}|j                            |d          }|j                            |          }|                    dd	          }t          j        ||           d
}|j                            |d          }|j                            |          }|                    dd	          }t          j        ||           d}|j                            |d          }	|j                            |          }|                    dd	          }t          j        |	|           d}
|j                            |
d          }|j                            |
          }|                    dd	          }t          j        ||           d S )Nr~   r   rP   ([a-z])([0-9])Tr   r   r   level!(?P<letter>[a-z])(?P<digit>[0-9])(?P<group_name>[a-z])r   )r   r   r   r   xsr"   r#   )r   r$   pattern_two_nonameextract_two_nonamehas_multi_indexno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonames               r   test_extractall_same_as_extractr    s   !!!=MNNNA*'9$GGe&&'9::O$'''99N,n===<&7EEe&&'899O$'''99N+^<<<0&7EEe&&'899O$'''99N+^<<<#'9$GGe&&'9::O$'''99N,n=====r(   c                    t          j        g dd          }t          g d|d|           }d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d}	|j                            |	d	          }
|j                            |	          }|                    d
d          }t          j        |
|           d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d S )N))rN   r   )rY   r   )rm   third)capitalordinalr   r~   r   )rt   rQ   r   r  Tr   r   r   r  r	  r
  r   )	r   r   r   r   r   r   r  r"   r#   )r   r   r$   r  r  has_match_indexno_match_indexr  r  r  r  r  r  s                r   -test_extractall_same_as_extract_subject_indexr    s   		999$
 
 
B 	!!!-GWXXXA*'9$GGe&&'9::O$'''99N,n===<&7EEe&&'899O$'''99N+^<<<0&7EEe&&'899O$'''99N+^<<<#'9$GGe&&'9::O$'''99N,n=====r(   c                      t          j        d          } t          ddgt          |                                                     j                            d          }|j        d         dk    sJ d S )Npyarrowr   r   r   z(ab)r   zstring[pyarrow])r   importorskipr   r   stringr   r   dtypes)par&   s     r   test_extractall_preserves_dtyper$    sk     
	Y	'	'BUDMBIIKK)@)@AAAEPPQWXXF=0000000r(   ),r   r   numpyr   r   pandas.core.dtypes.dtypesr   pandasr   r   r   r   r	   r"   r   r'   r4   r>   rJ   rT   rp   r|   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r  r$   r(   r   <module>r+     s.         				      0 0 0 0 0 0             > > >, , ,"- - -,7 7 7
9 
9 
9	0 	0 	0P, P, P,f, , ,6- - -, , ,, , ,.8 8 8, , , $!6776, 6, 876,r, , ,<, , ,4, , ,R, R, R,j  
.(/CD	#a]3	 
, 
, 
,, , ,*, , ,  	W	W	\	\			w'			|,			|,			|,	 (, (, (,V", ", ",J# # #     > > >8> > >B1 1 1 1 1r(   