
    <e,                     H   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
 d dlmZmZ  G d d          Z G d d	eej                  Z G d
 deej                  Z G d deej                  Z G d deej                  Z G d de          Z G d deej                  Z G d deej                  Z G d de          Z G d deej                  Z G d deej                  Z G d deej                  Z G d deej                  Z G d  d!e          Z G d" d#eej                  Z G d$ d%eej                  Z G d& d'eej                  Z G d( d)eej                  Z  G d* d+eej                  Z! G d, d-eej                  Z" G d. d/e          Z# ej$        ej%        d0           G d1 d2ej                              Z& ej'                     G d3 d4ej                              Z(e)d5k    r ej*                     dS dS )6    N)support)socket_helper)threading_helper)BaseHTTPRequestHandler
HTTPServerc                   @    e Zd ZdZdZg Zg ZdZd Zd Z	d Z
d Zd ZdS )	BaseRobotTest test_robotparserNc                     t          j        | j                                                  }t          j                                        | _        | j                            |           d S N)	ioStringIO
robots_txt	readlinesurllibrobotparserRobotFileParserparserparse)selfliness     ,/usr/lib/python3.11/test/test_robotparser.pysetUpzBaseRobotTest.setUp   sQ    DO,,6688(88::%         c                 P    t          |t                    r	|\  }}||fS | j        |fS r   )
isinstancetupleagentr   urlr   s      r   get_agent_and_urlzBaseRobotTest.get_agent_and_url   s3    c5!! 	JE3#:z3r   c                    | j         D ]w}|                     |          \  }}|                     ||          5  |                     | j                            ||                     d d d            n# 1 swxY w Y   xd S N)r!   r   )goodr"   subTest
assertTruer   	can_fetchr    s      r   test_good_urlszBaseRobotTest.test_good_urls   s    9 	C 	CC//44JE3#U33 C C 5 5eS A ABBBC C C C C C C C C C C C C C C	C 	C   /A44A8	;A8	c                    | j         D ]w}|                     |          \  }}|                     ||          5  |                     | j                            ||                     d d d            n# 1 swxY w Y   xd S r$   )badr"   r&   assertFalser   r(   r    s      r   test_bad_urlszBaseRobotTest.test_bad_urls$   s    8 	D 	DC//44JE3#U33 D D  !6!6uc!B!BCCCD D D D D D D D D D D D D D D	D 	Dr*   c                 j    |                      | j                                        | j                   d S r   )assertEqualr   	site_mapsr   s    r   test_site_mapszBaseRobotTest.test_site_maps*   s.    ..00$.AAAAAr   )__name__
__module____qualname__r   r   r%   r,   r1   r   r"   r)   r.   r3    r   r   r	   r	      s        JED
CI! ! !
  C C CD D DB B B B Br   r	   c                   "    e Zd ZdZddgZg dZdS )UserAgentWildcardTestzUser-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    /
/test.html)/cyberworld/map/index.htmlz/tmp/xxx	/foo.htmlNr4   r5   r6   r   r%   r,   r7   r   r   r9   r9   .   s,        J D
A
A
ACCCr   r9   c                        e Zd ZdZg dZdgZdS )CrawlDelayAndCustomAgentTestz# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r:   r;   )cybermapperr<   r<   Nr>   r7   r   r   r@   r@   9   s+        J NMMD'
(CCCr   r@   c                   (    e Zd ZdZddgZdgZddgZdS )SitemapTesta  # robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r:   r;   r<   z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r4   r5   r6   r   r%   r,   r1   r7   r   r   rC   rC   J   s7        	J D'
(CJEGIIIr   rC   c                       e Zd ZdZg Zg dZdS )RejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r<   r:   /tmp/Nr>   r7   r   r   rE   rE   [   s'        J
 D
6
6
6CCCr   rE   c                       e Zd ZdZdZd ZdS )BaseRequestRateTestNc                 r   | j         }| j        | j        z   D ]}|                     |          \  }}|                     ||          5  |                     |                    |          | j                   |                    |          }|                     || j                   | j        o|                     |t          j
        j                   |                     |j        | j        j                   |                     |j        | j        j                   d d d            n# 1 swxY w Y    d S r$   )r   r%   r,   r"   r&   r0   crawl_delayrequest_rateassertIsInstancer   r   RequestRaterequestsseconds)r   r   r!   r   parsed_request_rates        r   test_request_ratez%BaseRequestRateTest.test_request_ratei   s   9tx' 	 	C//44JE3#U33    !3!3E!:!:D<LMMM&,&9&9%&@&@#  !4d6GHHH$0))+*6   $$+4)2   $$+3)1                	 	s   	CD**D.	1D.	)r4   r5   r6   rK   rJ   rQ   r7   r   r   rH   rH   e   s-        LK    r   rH   c                       e Zd ZdZdgZdS )EmptyFileTestr
   z/fooN)r4   r5   r6   r   r%   r7   r   r   rS   rS      s        J8DDDr   rS   c                   ^    e Zd ZdZdZej                            dd          ZdZ	dgZ
g dZdS )	CrawlDelayAndRequestRateTestzUser-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    figtree	         )rV   r=   )/tmpz	/tmp.html/tmp/a.html/a%3cd.html/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r4   r5   r6   r   r   r   r   rM   rK   rJ   r%   r,   r7   r   r   rU   rU      sR        J E%11!R88LK$%D. . .CCCr   rU   c                       e Zd ZdZdS )DifferentAgentTestzFigTree Robot libwww-perl/5.04Nr4   r5   r6   r   r7   r   r   r_   r_      s        ,EEEr   r_   c                   $    e Zd ZdZdgZg dZdZdS )InvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rZ   )rF   r[   r\   r]   z	/a/b.htmlz/%7Ejoe/index.htmlrY   N)r4   r5   r6   r   r%   r,   rJ   r7   r   r   rb   rb      s4        J 8D! ! !CKKKr   rb   c                       e Zd ZdZdgZg ZdS )InvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r=   Nr>   r7   r   r   rd   rd      s#        J
 =D
CCCr   rd   c                   "    e Zd ZdZdZdgZdgZdS )AnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    	Googlebot/folder1/myfile.html/folder1/anotherfile.htmlNr4   r5   r6   r   r   r%   r,   r7   r   r   rf   rf      s,        J E"#D&
'CCCr   rf   c                       e Zd ZdZdZdgZdS )UserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rg   z/something.jpgN)r4   r5   r6   r   r   r,   r7   r   r   rl   rl      s$        J E
CCCr   rl   c                       e Zd ZdZdS )UserAgentGoogleMobileTestzGooglebot-MobileNr`   r7   r   r   rn   rn      s        EEEr   rn   c                   "    e Zd ZdZdZdgZdgZdS )GoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    	googlebotrh   ri   Nrj   r7   r   r   rp   rp      s,        J
 E"#D&
'CCCr   rp   c                       e Zd ZdZdgZdgZdS )DisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    
/some/pathz/some/path?name=valueNr>   r7   r   r   rs   rs      s&        J >D"
#CCCr   rs   c                       e Zd ZdZdgZdgZdS )UseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathrt   Nr>   r7   r   r   rv   rv      s&        J D.CCCr   rv   c                       e Zd ZdZdgZdgZdS )EmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr>   r7   r   r   rx   rx      s&        J
 ?D
CCCr   rx   c                   Z    e Zd ZdZej                            dd          ZdZddgZ	dgZ
dS )	DefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rY         r:   r;   r<   N)r4   r5   r6   r   r   r   rM   rK   rJ   r%   r,   r7   r   r   rz   rz     sF        J %11!R88LKD'
(CCCr   rz   c                       e Zd ZdZdZd ZdS )StringFormattingTestzUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c                 `    |                      t          | j                  | j                   d S r   )r0   strr   expected_outputr2   s    r   test_string_formattingz+StringFormattingTest.test_string_formatting*  s+    T[))4+?@@@@@r   N)r4   r5   r6   r   r   r   r7   r   r   r~   r~     s6        	JOA A A A Ar   r~   c                       e Zd Zd Zd ZdS )RobotHandlerc                 2    |                      dd           d S )Ni  zForbidden access)
send_errorr2   s    r   do_GETzRobotHandler.do_GET0  s    /00000r   c                     d S r   r7   )r   formatargss      r   log_messagezRobotHandler.log_message3  s    r   N)r4   r5   r6   r   r   r7   r   r   r   r   .  s2        1 1 1    r   r   z&Socket server requires working socket.c                   :    e Zd Zd Zd Zej        d             ZdS )PasswordProtectedSiteTestCasec                 4   |                      t          j        j                   t	          t
          j        dft                    | _        t          j
        d| j        j        ddi          | _        d| j        _        | j                                         d S )Nr   zHTTPServer servingpoll_intervalg{Gz?)nametargetkwargsT)
addCleanupr   request
urlcleanupr   r   HOSTr   server	threadingThreadserve_forevertdaemonstartr2   s    r   r   z#PasswordProtectedSiteTestCase.setUp=  s~    1222 -"4a!8,GG!%;, $D)+ + + r   c                     | j                                          | j                                         | j                                          d S r   )r   shutdownr   joinserver_closer2   s    r   tearDownz&PasswordProtectedSiteTestCase.tearDownM  s?      """""r   c                 X   | j         j        }dt          j        z   dz   t	          |d                   z   }|dz   }t
          j                                        }|                    |           |	                                 | 
                    |                    d|                     d S )Nzhttp://:r|   z/robots.txt*)r   server_addressr   r   r   r   r   r   set_urlreadr-   r(   )r   addrr!   
robots_urlr   s        r   testPasswordProtectedSitez7PasswordProtectedSiteTestCase.testPasswordProtectedSiteR  s    {)-,,s2Sa\\A=(
#3355s))#z::;;;;;r   N)r4   r5   r6   r   r   r   reap_threadsr   r7   r   r   r   r   7  sQ           # # #
 "< < #"< < <r   r   c                   j    e Zd ZdZd                    e          Zed             Zd Zd Z	d Z
d ZdS )	NetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc                    t          j        d           t          j        | j                  5  t
          j                            | j                  | _	        | j	        
                                 d d d            d S # 1 swxY w Y   d S )Nnetwork)r   requiresr   transient_internetbase_urlr   r   r   r   r   r   )clss    r   
setUpClasszNetworkTestCase.setUpClassc  s    ###-cl;; 	 	+;;CNKKCJJOO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AA>>BBc                     d                     | j        |t          j                            |          d         sdnd          S )Nz{}{}{}r|   r:   r
   )r   r   ospathsplitext)r   r   s     r   r!   zNetworkTestCase.urlj  s?    M4BG,<,<T,B,B1,E!M2
 
 	
r   c                    |                      | j        j                   |                      | j        j                   |                     | j                                        d           |                      | j                            d                     |                      | j                            d                     d S )Nr   r   )r-   r   disallow_all	allow_allassertGreatermtimerJ   rK   r2   s    r   
test_basiczNetworkTestCase.test_basico  s    1222.///4;,,..222005566611#6677777r   c                    |                      | j                            d|                     d                               |                     | j                            d| j                             |                     | j                            d|                     d                               |                     | j                            d|                     d                               |                     | j                            d|                     d                               |                      | j                            d| j                             d S )Nr   	elsewhereNutchbrianwebstats)r'   r   r(   r!   r-   r   r2   s    r   test_can_fetchzNetworkTestCase.test_can_fetchv  s   --c488K3H3HIIJJJ..wFFGGG..w8I8IJJKKK..w8L8LMMNNN..sDHHZ4H4HIIJJJ--c4=AABBBBBr   c                    t           j                            |                     d                    }|                                 |                     |j                   |                     |j                   | 	                    |
                                d           |                     |                    d                     |                     |                    d                     d S )Nzi-robot.txtr   r   )r   r   r   r!   r   r'   r   r-   r   r0   r   assertIsNonerJ   rK   )r   r   s     r   test_read_404zNetworkTestCase.test_read_404~  s    #33DHH]4K4KLL())),---+++&,,S11222&--c2233333r   N)r4   r5   r6   r   r   r   classmethodr   r!   r   r   r   r7   r   r   r   r   ]  s         ,H)00::J  [
 
 

8 8 8C C C4 4 4 4 4r   r   __main__)+r   r   r   unittesturllib.robotparserr   testr   test.supportr   r   http.serverr   r   r	   TestCaser9   r@   rC   rE   rH   rS   rU   r_   rb   rd   rf   rl   rn   rp   rs   rv   rx   rz   r~   r   
skipUnlesshas_socket_supportr   requires_working_socketr   r4   mainr7   r   r   <module>r      s.   				 				                & & & & & & ) ) ) ) ) ) : : : : : : : :B B B B B B B BDB B B B BM8+< B B B) ) ) ) )=(2C ) ) )"G G G G G-!2 G G G"7 7 7 7 7-): 7 7 7    -   8    '):   
. . . . .#68I . . .$- - - - -5 - - -    ]H,=    	 	 	 	 	M8+< 	 	 	
( 
( 
( 
( 
(M83D 
( 
( 
(    M8+<        5   
( 
( 
( 
( 
(M8+< 
( 
( 
($ $ $ $ $mX-> $ $ $
 
 
 
 
M83D 
 
 
    =(*;   
) 
) 
) 
) 
)*H,= 
) 
) 
)A A A A A=(*; A A A4    )    , < < < < <H$5 < <	 <D ! ""'4 '4 '4 '4 '4h' '4 '4 #"'4R ZHMOOOOO r   