
    i+#                     :   S r SSKrSSKrSSKr \R                  R                  r\" S5      r0 rS rSS jrS rS rS rSS	 jrSS
 jrS rS rS S jrS S jrS S jrS!S jrS S jrS"S jrS#S jrS S jrS"S jrS rS rS$S jr S%S jr!S&S jr"S'S jr#S(S jr$S r%\RL                  RN                  r'\RL                  RP                  r(\RL                  RR                  r)\RT                  r*\RV                  r+g! \	 a    \R                  R                  r Nf = f))z1Collection of functions that work on strings/text    Nz<[^>]+>c                 b     [         U    $ ! [         a    [        U 5      =n[         U '   Us $ f = f)z$Compile a regular expression pattern)PATTERN_CACHEKeyError
re_compile)patternps     </app/mltbenv/lib/python3.13/site-packages/gallery_dl/text.pyrer
      s:    W%% %/%88M'"s     ..c                      [         R                  X5      n U(       a  UR                  U R	                  5       5      $ U R                  5       $ ! [         a     gf = f)zRemove html-tags from a string )HTML_REsub	TypeErrorjoinsplitstrip)txtreplseps      r	   remove_htmlr   !   sO    kk$$ xx		$$99;	  s   A 
AAc                      [         R                  U 5       Vs/ s H<  nU(       d  M  UR                  5       (       a  M#  [        U5      R	                  5       PM>     sn$ s  snf ! [
         a    / s $ f = f)zSplit input string by HTML tags)r   r   isspaceunescaper   r   )r   xs     r	   
split_htmlr   ,   sm     ]]3'
'    HQK'
 	
 

  	s,   A$ AAAA$ A$ $A32A3c                     [        S5      R                  S[        U 5      R                  5       5      n [        S5      R                  SU 5      R	                  S5      $ )zpConvert a string to a URL slug

Adapted from:
https://github.com/django/django/blob/master/django/utils/text.py
z[^\w\s-]r   z[-\s]+-z-_)r
   r   strlowerr   values    r	   slugifyr"   8   sI     {OCJ$4$4$67Ei=S%(..t44    c                 T    [        S5      R                  SU R                  5       5      $ )z5Replace all whitespace characters with a single spacez\s+ )r
   r   r   r    s    r	   sanitize_whitespacer&   B   s    f:>>#u{{}--r#   c                 f    U (       a)  U R                  S5      (       d  XR                  S5      -   $ U $ )z0Prepend 'scheme' to 'url' if it doesn't have onehttps://zhttp://z/:)
startswithlstripurlschemes     r	   ensure_http_schemer/   G   s+    
3>>"9::

4(((Jr#   c                     U R                  S5      (       d   XSU R                  S5       -   $  U SU R                  SS5       $ ! [         a    X-   s $ f = f! [         a    U s $ f = f)z$Extract scheme and domain from a URLr(   N/   )r*   index
ValueErrorr,   s     r	   root_from_urlr5   N   s{    >>122	 3000%CIIc1%&&  	 <	   
s"   A A AAA'&A'c                 p     U R                  S5      S   R                  S5      S   $ ! [         a     gf = f)z4Extract the last part of an URL to use as a filename?r   r1      r   )	partition
rpartition	Exception)r-   s    r	   filename_from_urlr<   [   s=    }}S!!$//4Q77 s   %( 
55c                 n    [        U 5      R                  S5      u  pnU(       a  UR                  5       $ S$ )z(Extract the filename extension of an URL.r   )r<   r:   r   )r-   name_exts       r	   ext_from_urlrB   c   s.    $S)44S9LDS399;&B&r#   c                     Uc  0 n[        [        U 5      5      nUR                  S5      u  p4nU(       a(  [        U5      S::  a  X1S'   UR	                  5       US'   U$ X!S'   SUS'   U$ )z;Extract the last part of an URL and fill 'data' accordinglyr>      filename	extensionr   )unquoter<   r:   lenr   )r-   datarE   r?   r@   rA   s         r	   nameext_from_urlrJ   i   su    |(-.H&&s+LDSCBZIIK[
 K $Z[Kr#   c                     Uc  0 nU R                  S5      u  p#nU(       a(  [        U5      S::  a  X!S'   UR                  5       US'   U$ XS'   SUS'   U$ )z@Extract the last part of a file name and fill 'data' accordinglyr>   rD   rE   rF   r   )r:   rH   r   )rE   rI   r?   r@   rA   s        r	   nameext_from_namerL   z   se    |&&s+LDSCBZIIK[
 K $Z[Kr#   c                      U R                  X5      [        U5      -   nU R                  X$5      nXU U[        U5      -   4$ ! [         a    SUc  SOU4s $ f = f)aY  Extract the text between 'begin' and 'end' from 'txt'

Args:
    txt: String to search in
    begin: First string to be searched for
    end: Second string to be searched for after 'begin'
    pos: Starting position for searches in 'txt'

Returns:
    The string between the two search-strings 'begin' and 'end' beginning
    with position 'pos' in 'txt' as well as the position after 'end'.

    If at least one of 'begin' or 'end' is not found, None and the original
    value of 'pos' is returned

Examples:
    extract("abcde", "b", "d")    -> "c" , 4
    extract("abcde", "b", "d", 3) -> None, 3
Nr   r3   rH   r;   )r   beginendposfirstlasts         r	   extractrT      sc    (/		%%E
2yy$SX-- /#+Q3../s   ?A AAc                      U R                  U5      [        U5      -   nXU R                  X$5       $ ! [         a    Us $ f = f)z$Stripped-down version of 'extract()'rN   )r   rO   rP   defaultrR   s        r	   extrrW      sF    		% 3u:-3.// s   /2 A Ac                      [        U5      nU R                  US U5      nU R                  X%U-   5      nXU-   U U4$ ! [         a    S Uc  SOU4s $ f = f)N)rH   rindexr3   r;   )r   rO   rP   rQ   lbegrR   rS   s          r	   rextractr\      sh    05z

5$,yydl+4<%u,, 03;RC//0s   := AAc                      U R                  USU5      [        U5      -   nXU R                  X%5       $ ! [         a    Us $ f = f)z%Stripped-down version of 'rextract()'N)rZ   rH   r3   r;   )r   rO   rP   rQ   rV   rR   s         r	   rextrr^      sJ    

5$,s5z93.// s   14 AAc                 l    Uc  0 nU H!  u  pEn[        XXb5      u  prU(       d  M  XsU'   M#     X2c  S4$ U4$ )z<Calls extract for each rule and returns the result in a dictr   )rT   )r   rulesrQ   valueskeyrO   rP   results           r	   extract_allrd      sK    ~ Cc#33 3K ! 1,,,,r#   c              #      #     U R                   n[        U5      n[        U5      n U" X5      U-   nU" X'5      nX-   nXU v   M  ! [         a     gf = f7f)zBYield values that would be returned by repeated calls of extract()NrN   )	r   rO   rP   rQ   r3   r[   lendrR   rS   s	            r	   extract_iterrg      sj     
		5z3x%%,E$D+CD/!	 
  s#   AAA 
AAAAc                 4   ^^ U R                   U 4UU4S jjnU$ )z2Returns a function object that extracts from 'txt'c                    >  U" U T5      [        U 5      -   nU" X5      nU[        U5      -   mX4U $ ! [         a    Ts $ f = fN)rH   r;   )rO   rP   r3   r   rR   rS   rV   rQ   s         r	   rW   extract_from.<locals>.extr   sR    	%%E
2E$DS/CT?" 	N	s   .2 A A)r3   )r   rQ   rV   rW   s    `` r	   extract_fromrl      s    "yyc   Kr#   c                 P    SU ;   a  [        S5      R                  [        U 5      $ U $ )z<Convert JSON Unicode escapes in 'txt' into actual charactersz\uz\\u([0-9a-fA-F]{4}))r
   r   _hex_to_char)r   s    r	   parse_unicode_escapesro      s'    |()--lC@@Jr#   c                 2    [        [        U S   S5      5      $ )N   rD   )chrint)matchs    r	   rn   rn      s    s58R !!r#   c                    U (       d  U$ [        U 5      R                  5       n U S   R                  5       nX2;   a  SUR                  U5      -  nU SS n OSn [	        [        U 5      U-  5      $ ! [         a    Us $ f = f)z3Convert a bytes-amount ("500k", "2.5M", ...) to intrY   i   Nrq   )r   r   r   r3   roundfloatr4   )r!   rV   suffixesrS   muls        r	   parse_bytesrz      s    JE9??DhnnT**cr
U5\C'(( s   A/ /A>=A>c                 P    U (       d  U$  [        U 5      $ ! [         a    Us $ f = f)zConvert 'value' to int)rs   r;   r!   rV   s     r	   	parse_intr}   	  s-    5z    
 %%c                 P    U (       d  U$  [        U 5      $ ! [         a    Us $ f = f)zConvert 'value' to float)rw   r;   r|   s     r	   parse_floatr     s-    U| r~   c                 6   U (       d  0 $ 0 n U R                  S5       Hf  nUR                  S5      u  pEnU(       d	  U(       d  M'  [        UR                  SS5      5      nXB;  d  MI  [        UR                  SS5      5      X$'   Mh     U$ ! [         a     U$ f = f)zZParse a query string into name-value pairs

Ignore values whose name has been seen before
&=+r%   )r   r9   rG   replacer;   )qsemptyrc   
name_valuer?   eqr!   s          r	   parse_queryr     s    
 	F((3-J(2237ODeUUt||C56%#*5==c+B#CFL ( M  Ms   4B
 B
 '!B
 

BBc                 x   U (       d  0 $ 0 n U R                  S5       H  nUR                  S5      u  pEnU(       d  M   [        UR                  SS5      5      n[        UR                  SS5      5      nXA;   a!  XB;   a  X$   R	                  U5        Mu  U/X$'   M|  XB;  d  M  XbU'   M     U$ ! [
         a     U$ f = f)z\Parse a query string into name-value pairs

Combine values of names in 'as_list' into lists
r   r   r   r%   )r   r9   rG   r   appendr;   )r   as_listrc   r   r?   r   r!   s          r	   parse_query_listr   2  s    
 	F((3-J(2237ODert||C56c3 78?~++E2(-w'#(4L ( M  Ms   -B+ AB+ !B+ +
B98B9c                     SR                  U R                  5        VVs/ s H  u  p[        U5       S[        U5       3PM      snn5      $ s  snnf )Nr   r   )r   itemsquote)paramsr?   r!   s      r	   build_queryr   M  sN    88!<<>)KD ;-qu')   s   %A
)r%   r%   )r)   rj   )r   )Nr   )NN)r   bkmgtp)r   )g        )F) ),__doc__htmlurllib.parseurllibr
   	re_module	_compilercompiler   AttributeErrorsre_compiler   r   r   r   r"   r&   r/   r5   r<   rB   rJ   rL   rT   rW   r\   r^   rd   rg   rl   ro   rn   rz   r}   r   r   r   r   parseurljoinr   rG   escaper   r   r#   r	   <module>r      s    8   /$$,,J Z
 	5.

'" /80-"(*6 ,,


,,

	==S
  /&&..J/s   C; ;DD