a
    äzeV+  ã                   @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZ G dd„ dƒZG dd„ deejƒZG d	d
„ d
eejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd „ d eƒZG d!d"„ d"eejƒZG d#d$„ d$eejƒZG d%d&„ d&eejƒZG d'd(„ d(eejƒZG d)d*„ d*eejƒZ G d+d,„ d,eejƒZ!G d-d.„ d.eƒZ"G d/d0„ d0ejƒZ#G d1d2„ d2ejƒZ$e%d3kre &¡  dS )4é    N)Úsupport)Úsocket_helper)ÚBaseHTTPRequestHandlerÚ
HTTPServerc                   @   sH   e Zd ZdZdZg Zg ZdZdd„ Zdd„ Z	dd	„ Z
d
d„ Zdd„ ZdS )ÚBaseRobotTestÚ Ztest_robotparserNc                 C   s,   t  | j¡ ¡ }tj ¡ | _| j |¡ d S ©N)	ÚioÚStringIOÚ
robots_txtÚ	readlinesÚurllibÚrobotparserÚRobotFileParserÚparserÚparse)ÚselfÚlines© r   ú+/usr/lib/python3.9/test/test_robotparser.pyÚsetUp   s    zBaseRobotTest.setUpc                 C   s$   t |tƒr|\}}||fS | j|fS r   )Ú
isinstanceÚtupleÚagent©r   Úurlr   r   r   r   Úget_agent_and_url   s    
zBaseRobotTest.get_agent_and_urlc              	   C   s`   | j D ]T}|  |¡\}}| j||d$ |  | j ||¡¡ W d   ƒ q1 sP0    Y  qd S ©N)r   r   )Úgoodr   ÚsubTestÚ
assertTruer   Ú	can_fetchr   r   r   r   Útest_good_urls   s    
zBaseRobotTest.test_good_urlsc              	   C   s`   | j D ]T}|  |¡\}}| j||d$ |  | j ||¡¡ W d   ƒ q1 sP0    Y  qd S r   )Úbadr   r   ÚassertFalser   r!   r   r   r   r   Útest_bad_urls#   s    
zBaseRobotTest.test_bad_urlsc                 C   s   |   | j ¡ | j¡ d S r   )ÚassertEqualr   Ú	site_maps©r   r   r   r   Útest_site_maps)   s    zBaseRobotTest.test_site_maps)Ú__name__Ú
__module__Ú__qualname__r   r   r   r#   r'   r   r   r"   r%   r)   r   r   r   r   r      s   r   c                   @   s    e Zd ZdZddgZg d¢ZdS )ÚUserAgentWildcardTestz•User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    ú/ú
/test.html)ú/cyberworld/map/index.htmlz/tmp/xxxú	/foo.htmlN©r*   r+   r,   r   r   r#   r   r   r   r   r-   -   s   r-   c                   @   s   e Zd ZdZg d¢ZdgZdS )ÚCrawlDelayAndCustomAgentTestzå# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r.   r/   )Zcybermapperr0   r0   Nr2   r   r   r   r   r3   8   s   r3   c                   @   s&   e Zd ZdZddgZdgZddgZdS )ÚSitemapTesta  # robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r.   r/   r0   z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r*   r+   r,   r   r   r#   r'   r   r   r   r   r4   I   s   
ÿr4   c                   @   s   e Zd ZdZg Zg d¢ZdS )ÚRejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r0   r.   ú/tmp/Nr2   r   r   r   r   r5   Z   s   r5   c                   @   s   e Zd ZdZdZdd„ ZdS )ÚBaseRequestRateTestNc              	   C   sÂ   | j }| j| j D ]ª}|  |¡\}}| j||dz |  | |¡| j¡ | |¡}|  || j¡ | jd urž|  |t	j
j¡ |  |j| jj¡ |  |j| jj¡ W d   ƒ q1 s²0    Y  qd S r   )r   r   r#   r   r   r&   Úcrawl_delayÚrequest_rateZassertIsInstancer   r   ÚRequestRateZrequestsZseconds)r   r   r   r   Zparsed_request_rater   r   r   Útest_request_rateh   s(    

þþþz%BaseRequestRateTest.test_request_rate)r*   r+   r,   r9   r8   r;   r   r   r   r   r7   d   s   r7   c                   @   s   e Zd ZdZdgZdS )ÚEmptyFileTestr   z/fooN)r*   r+   r,   r   r   r   r   r   r   r<   €   s   r<   c                   @   s4   e Zd ZdZdZej dd¡ZdZ	dgZ
g d¢ZdS )	ÚCrawlDelayAndRequestRateTestz’User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    Úfigtreeé	   é   é   )r>   r1   )ú/tmpz	/tmp.htmlú/tmp/a.htmlú/a%3cd.htmlú/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r*   r+   r,   r   r   r   r   r:   r9   r8   r   r#   r   r   r   r   r=   …   s   	r=   c                   @   s   e Zd ZdZdS )ÚDifferentAgentTestzFigTree Robot libwww-perl/5.04N©r*   r+   r,   r   r   r   r   r   rF   —   s   rF   c                   @   s"   e Zd ZdZdgZg d¢ZdZdS )ÚInvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rB   )r6   rC   rD   rE   z	/a/b.htmlz/%7Ejoe/index.htmlrA   N)r*   r+   r,   r   r   r#   r8   r   r   r   r   rH   ›   s   	rH   c                   @   s   e Zd ZdZdgZg ZdS )ÚInvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r1   Nr2   r   r   r   r   rI   «   s   rI   c                   @   s    e Zd ZdZdZdgZdgZdS )ÚAnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    Ú	Googlebotú/folder1/myfile.htmlú/folder1/anotherfile.htmlN©r*   r+   r,   r   r   r   r#   r   r   r   r   rJ   ·   s   rJ   c                   @   s   e Zd ZdZdZdgZdS )ÚUserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rK   z/something.jpgN)r*   r+   r,   r   r   r#   r   r   r   r   rO   Ä   s   rO   c                   @   s   e Zd ZdZdS )ÚUserAgentGoogleMobileTestzGooglebot-MobileNrG   r   r   r   r   rP   Ó   s   rP   c                   @   s    e Zd ZdZdZdgZdgZdS )ÚGoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    Z	googlebotrL   rM   NrN   r   r   r   r   rQ   ×   s   rQ   c                   @   s   e Zd ZdZdgZdgZdS )ÚDisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    ú
/some/pathz/some/path?name=valueNr2   r   r   r   r   rR   ä   s   rR   c                   @   s   e Zd ZdZdgZdgZdS )ÚUseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathrS   Nr2   r   r   r   r   rT   î   s   rT   c                   @   s   e Zd ZdZdgZdgZdS )ÚEmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr2   r   r   r   r   rU   û   s   rU   c                   @   s0   e Zd ZdZej dd¡ZdZddgZ	dgZ
dS )	ÚDefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rA   é   é   r.   r/   r0   N)r*   r+   r,   r   r   r   r:   r9   r8   r   r#   r   r   r   r   rV     s
   rV   c                   @   s   e Zd ZdZdZdd„ ZdS )ÚStringFormattingTestzÆUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c                 C   s   |   t| jƒ| j¡ d S r   )r&   Ústrr   Úexpected_outputr(   r   r   r   Útest_string_formatting)  s    z+StringFormattingTest.test_string_formattingN)r*   r+   r,   r   r[   r\   r   r   r   r   rY     s   
rY   c                   @   s   e Zd Zdd„ Zdd„ ZdS )ÚRobotHandlerc                 C   s   |   dd¡ d S )Ni“  zForbidden access)Z
send_errorr(   r   r   r   Údo_GET/  s    zRobotHandler.do_GETc                 G   s   d S r   r   )r   ÚformatÚargsr   r   r   Úlog_message2  s    zRobotHandler.log_messageN)r*   r+   r,   r^   ra   r   r   r   r   r]   -  s   r]   c                   @   s*   e Zd Zdd„ Zdd„ Zejdd„ ƒZdS )ÚPasswordProtectedSiteTestCasec                 C   sP   |   tjj¡ ttjdftƒ| _t	j
d| jjddid| _d| j_| j ¡  d S )Nr   zHTTPServer servingZpoll_intervalg{®Gáz„?)ÚnameÚtargetÚkwargsT)Z
addCleanupr   ZrequestÚ
urlcleanupr   r   ÚHOSTr]   ÚserverÚ	threadingÚThreadZserve_foreverÚtÚdaemonÚstartr(   r   r   r   r   8  s    úz#PasswordProtectedSiteTestCase.setUpc                 C   s"   | j  ¡  | j ¡  | j  ¡  d S r   )rh   Úshutdownrk   ÚjoinZserver_closer(   r   r   r   ÚtearDownH  s    

z&PasswordProtectedSiteTestCase.tearDownc                 C   s\   | j j}dtj d t|d ƒ }|d }tj ¡ }| |¡ | 	¡  |  
| d|¡¡ d S )Nzhttp://ú:rX   z/robots.txtÚ*)rh   Zserver_addressr   rg   rZ   r   r   r   Zset_urlÚreadr$   r!   )r   Úaddrr   Z
robots_urlr   r   r   r   ÚtestPasswordProtectedSiteM  s    

z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r*   r+   r,   r   rp   r   Zreap_threadsru   r   r   r   r   rb   6  s   rb   c                   @   sF   e Zd ZdZd e¡Zedd„ ƒZdd„ Zdd„ Z	d	d
„ Z
dd„ ZdS )ÚNetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc                 C   sT   t  d¡ t | j¡* tj | j¡| _	| j	 
¡  W d   ƒ n1 sF0    Y  d S )NZnetwork)r   Zrequiresr   Ztransient_internetÚbase_urlr   r   r   r   r   rs   )Úclsr   r   r   Ú
setUpClass]  s    
zNetworkTestCase.setUpClassc                 C   s$   d  | j|tj |¡d sdnd¡S )Nz{}{}{}rX   r.   r   )r_   rw   ÚosÚpathÚsplitext)r   r{   r   r   r   r   d  s    ÿzNetworkTestCase.urlc                 C   sV   |   | jj¡ |   | jj¡ |  | j ¡ d¡ |   | j d¡¡ |   | j d¡¡ d S )Nr   rr   )r$   r   Údisallow_allÚ	allow_allZassertGreaterÚmtimer8   r9   r(   r   r   r   Ú
test_basici  s
    zNetworkTestCase.test_basicc                 C   s˜   |   | j d|  d¡¡¡ |  | j d| j¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |   | j d| j¡¡ d S )Nrr   Z	elsewhereZNutchZbrianZwebstats)r    r   r!   r   r$   rw   r(   r   r   r   Útest_can_fetchp  s    zNetworkTestCase.test_can_fetchc                 C   sf   t j |  d¡¡}| ¡  |  |j¡ |  |j¡ |  	| 
¡ d¡ |  | d¡¡ |  | d¡¡ d S )Nzi-robot.txtr   rr   )r   r   r   r   rs   r    r~   r$   r}   r&   r   ZassertIsNoner8   r9   )r   r   r   r   r   Útest_read_404x  s    zNetworkTestCase.test_read_404N)r*   r+   r,   rw   r_   r   Úclassmethodry   r   r€   r   r‚   r   r   r   r   rv   X  s   

rv   Ú__main__)'r	   rz   ri   ZunittestZurllib.robotparserr   Útestr   Ztest.supportr   Zhttp.serverr   r   r   ZTestCaser-   r3   r4   r5   r7   r<   r=   rF   rH   rI   rJ   rO   rP   rQ   rR   rT   rU   rV   rY   r]   rb   rv   r*   Úmainr   r   r   r   Ú<module>   s@   "

	")
