
    i                         S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  SSK	J
r
Jr  \R                  " S\R                  5      r1 Skr1 S	kr1 S
kr " S S\5      rS rS r " S S\5      rS rg)    N)
HTMLParser)name2codepoint)escape   )NotAllowedTagInvalidHTMLz(\s+)>   abipsubremh3h4hrliolulimgprecodeasidevideofigureiframestrong
blockquote
figcaption>   r   r   colr   wbrareabaselinkmetaembedinputparamtrackkeygensourcemenuitem>$   r   dddldth1h2r   r   h5h6r   r   r   r   divnavr   formmainr   tabletfootr   canvasr   footerheaderhgroupoutputaddressarticlesectionfieldsetnoscriptr   r    c                   D    \ rS rSrS rS rS rS rS rS r	S r
S	 rS
rg)HtmlToNodesParser#   c                     [         R                  " U 5        / U l        U R                  U l        / U l        S U l        / U l        g N)r   __init__nodescurrent_nodesparent_nodeslast_text_node	tags_pathselfs    </app/mltbenv/lib/python3.13/site-packages/telegraph/utils.pyrJ   HtmlToNodesParser.__init__$   s:    D!
!ZZ"    c                    U(       d  g SU R                   ;  ai  [        R                  SU5      nU R                  b   U R                  R	                  S5      (       a  UR                  S5      nU(       d  S U l        g Xl        U R                  (       a:  [        U R                  S   [        5      (       a  U R                  S==   U-  ss'   g U R                  R                  U5        g )Nr    )
rO   RE_WHITESPACEsubrN   endswithlstriprL   
isinstancestrappend)rQ   r   s     rR   add_str_nodeHtmlToNodesParser.add_str_node0   s    &!!#q)A""*d.A.A.J.J3.O.OHHSM&*#"#*T-?-?-CS"I"Ir"a'"%%a(rT   c                 |   U[         ;  a  [        U< S35      eU[        ;   a  S U l        SU0nU R                  R                  U5        U R                  R                  U5        U(       a  0 nXCS'   U H	  u  pVXdU'   M     U[        ;  a2  U R                  R                  U R                  5        / =U l        US'   g g )Nz tag is not allowedtagattrschildren)	ALLOWED_TAGSr   BLOCK_ELEMENTSrN   rO   r^   rL   VOID_ELEMENTSrM   )rQ   rb   
attrs_listnoderc   attrvalues          rR   handle_starttag!HtmlToNodesParser.handle_starttagE   s    l"3')< =>>. "&Ds|c"!!$'E!M)#d  * m#$$T%7%78466Dj!1 $rT   c                 p   U[         ;   a  g [        U R                  5      (       d  [        U< S35      eU R                  R	                  5       U l        U R
                  S   nUS   U:w  a  [        U< SUS   < 35      eU R                  R	                  5         US   (       d  UR	                  S5        g g )Nz missing start tagrW   rb   z tag closed instead of rd   )rg   lenrM   r   poprL   rO   )rQ   rb   	last_nodes      rR   handle_endtagHtmlToNodesParser.handle_endtag[   s    -4$$%%'9:;;!..224&&r*	Us"'>y?O>RSTT$MM*% %rT   c                 &    U R                  U5        g rI   )r_   )rQ   datas     rR   handle_dataHtmlToNodesParser.handle_datan   s    $rT   c                 F    U R                  [        [        U   5      5        g rI   )r_   chrr   )rQ   names     rR   handle_entityref"HtmlToNodesParser.handle_entityrefq   s    #nT234rT   c                     UR                  S5      (       a  [        [        USS  S5      5      nO[        [        U5      5      nU R                  U5        g )Nxr      )
startswithry   intr_   )rQ   rz   cs      rR   handle_charref HtmlToNodesParser.handle_charreft   sC    ??3CQR"%&ACIA!rT   c                     U R                   (       a$  U R                   S   S   S   n[        U< S35      eU R                  $ )NrW   rb   z tag is not closed)rM   r   rK   )rQ   not_closed_tags     rR   	get_nodesHtmlToNodesParser.get_nodes|   sC    !..r226u=N 22DEFFzzrT   )rL   rN   rK   rM   rO   N)__name__
__module____qualname____firstlineno__rJ   r_   rl   rr   rv   r{   r   r   __static_attributes__ rT   rR   rF   rF   #   s*    
)*7,&& 5rT   rF   c                 X    [        5       nUR                  U 5        UR                  5       $ rI   )rF   feedr   )html_contentparsers     rR   html_to_nodesr      s%     F
KKrT   c           	         / nUR                   n/ nU nSn US-  nU[        U5      :  a.  U(       d  GO	UR                  5       u  pEU" SXE   S    S35        MC  XE   n[        U[        5      (       a  U" [        U5      5        Mo  U" SUS    35        UR                  S5      (       a4  US   R                  5        H  u  pxU" SU S	[        U5       S
35        M     UR                  S5      (       a"  U" S5        UR                  XE45        US   SpTM  US   [        ;   a	  U" S5        OU" SUS    S35        GM&  SR                  U5      $ )NrW   r   z</rb   ><rc   rV   z=""rd   z/>z></ )
r^   ro   rp   r\   r]   r   getitemsrg   join)	rK   outr^   stackcurrr   ri   rj   rk   s	            rR   nodes_to_htmlr      sP   
CZZFED
A
	QD	>iikGDR'q)*wdC  6$< 4;- !88G#G}2244&6%=/34  5 88J3KLL$#:&!;-'4LSeQ'(? B 773<rT   c                   6    \ rS rSrS	S jrS rS rS rS rSr	g)
FilesOpener   c                 Z    [        U[        5      (       d  U/nXl        X l        / U l        g rI   )r\   listpaths
key_formatopened_files)rQ   r   r   s      rR   rJ   FilesOpener.__init__   s(    %&&GE
$rT   c                 "    U R                  5       $ rI   )
open_filesrP   s    rR   	__enter__FilesOpener.__enter__   s      rT   c                 $    U R                  5         g rI   )close_files)rQ   typerk   	tracebacks       rR   __exit__FilesOpener.__exit__   s    rT   c                 H   U R                  5         / n[        U R                  5       H  u  p#Sn[        U[        5      (       a  [        U5      S:  a
  US   nUS   n[        US5      (       a#  Un[        US5      (       a  UR                  nO,UnO)Un[        US5      nU R                  R                  U5        [        R                  " 5       R                  U5      S   nUR                  U R                  R                  U5      SR                  U5      XW445        M     U$ )	Nr      r   r   readrz   rbfile{})r   	enumerater   r\   tuplero   hasattrrz   openr   r^   	mimetypes	MimeTypes
guess_typer   format)rQ   filesr~   file_or_namerz   ffilenamemimetypes           rR   r   FilesOpener.open_files   s    (4OAD,..3|3D3I#A+A|V,, 1f%% vvH#H'4(!!((+ **,77A!DHLL''*X__Q-?,MN)  50 rT   c                 X    U R                    H  nUR                  5         M     / U l         g rI   )r   close)rQ   r   s     rR   r   FilesOpener.close_files   s%    ""AGGI # rT   )r   r   r   N)r   )
r   r   r   r   rJ   r   r   r   r   r   r   rT   rR   r   r      s    !>rT   r   c                  8    [         R                  " U 0 UDSSS.D6$ )N),:F)
separatorsensure_ascii)jsondumps)argskwargss     rR   
json_dumpsr      s    ::tQvQ*5QQrT   )r   rer   html.parserr   html.entitiesr   htmlr   
exceptionsr   r   compileUNICODErX   re   rg   rf   rF   r   r   objectr   r   r   rT   rR   <module>r      sr     	  " (  2 

8RZZ0
^
 ^B)X2& 2jRrT   