B
    u9a(  ã               @   sø  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 G dd„ dƒZG dd„ deejƒZG dd	„ d	eejƒZG d
d„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG d d!„ d!eejƒZG d"d#„ d#eejƒZG d$d%„ d%eejƒZG d&d'„ d'eejƒZG d(d)„ d)eejƒZG d*d+„ d+e	ƒZG d,d-„ d-ejƒZ G d.d/„ d/ejƒZ!e"d0krôe #¡  dS )1é    N)Úsupport)ÚBaseHTTPRequestHandlerÚ
HTTPServerc               @   s<   e Zd ZdZdZg Zg Zdd„ Zdd„ Zdd„ Z	d	d
„ Z
dS )ÚBaseRobotTestÚ Ztest_robotparserc             C   s,   t  | j¡ ¡ }tj ¡ | _| j |¡ d S )N)	ÚioÚStringIOÚ
robots_txtÚ	readlinesÚurllibÚrobotparserÚRobotFileParserÚparserÚparse)ÚselfÚlines© r   ú&/usr/lib/python3.7/test_robotparser.pyÚsetUp   s    zBaseRobotTest.setUpc             C   s$   t |tƒr|\}}||fS | j|fS )N)Ú
isinstanceÚtupleÚagent)r   Úurlr   r   r   r   Úget_agent_and_url   s    
zBaseRobotTest.get_agent_and_urlc          
   C   sP   xJ| j D ]@}|  |¡\}}| j||d |  | j ||¡¡ W d Q R X qW d S )N)r   r   )Úgoodr   ÚsubTestÚ
assertTruer   Ú	can_fetch)r   r   r   r   r   r   Útest_good_urls   s    zBaseRobotTest.test_good_urlsc          
   C   sP   xJ| j D ]@}|  |¡\}}| j||d |  | j ||¡¡ W d Q R X qW d S )N)r   r   )Úbadr   r   ÚassertFalser   r   )r   r   r   r   r   r   Útest_bad_urls!   s    zBaseRobotTest.test_bad_urlsN)Ú__name__Ú
__module__Ú__qualname__r	   r   r   r   r   r   r   r!   r   r   r   r   r   
   s   r   c               @   s"   e Zd ZdZddgZdddgZdS )ÚUserAgentWildcardTestz•User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    ú/z
/test.htmlz/cyberworld/map/index.htmlz/tmp/xxxz	/foo.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r%   (   s   r%   c               @   s    e Zd ZdZdddgZdgZdS )ÚCrawlDelayAndCustomAgentTestzå# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    r&   z
/test.html)Zcybermapperz/cyberworld/map/index.htmlz/cyberworld/map/index.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r'   3   s   
r'   c               @   s   e Zd ZdZg ZdddgZdS )ÚRejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    z/cyberworld/map/index.htmlr&   z/tmp/N)r"   r#   r$   r	   r   r   r   r   r   r   r(   D   s   r(   c               @   s   e Zd ZdZdZdd„ ZdS )ÚBaseRequestRateTestNc          
   C   s²   | j }x¦| j| j D ]–}|  |¡\}}| j||dp |  | |¡| j¡ | |¡}|  || j¡ | jd k	r |  |t	j
j¡ |  |j| jj¡ |  |j| jj¡ W d Q R X qW d S )N)r   r   )r   r   r   r   r   ÚassertEqualÚcrawl_delayÚrequest_rateZassertIsInstancer   r   ÚRequestRateZrequestsZseconds)r   r   r   r   Zparsed_request_rater   r   r   Útest_request_rateR   s"    



z%BaseRequestRateTest.test_request_rate)r"   r#   r$   r,   r+   r.   r   r   r   r   r)   N   s   r)   c               @   s   e Zd ZdZdgZdS )ÚEmptyFileTestr   z/fooN)r"   r#   r$   r	   r   r   r   r   r   r/   j   s   r/   c               @   s>   e Zd ZdZdZej dd¡ZdZ	dgZ
ddd	d
dddgZdS )ÚCrawlDelayAndRequestRateTestz’User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    Úfigtreeé	   é   é   )r1   z	/foo.htmlz/tmpz	/tmp.htmlz/tmp/a.htmlz/a%3cd.htmlz/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r"   r#   r$   r	   r   r   r   r-   r,   r+   r   r   r   r   r   r   r0   o   s   	
r0   c               @   s   e Zd ZdZdS )ÚDifferentAgentTestzFigTree Robot libwww-perl/5.04N)r"   r#   r$   r   r   r   r   r   r5      s   r5   c               @   s*   e Zd ZdZdgZddddddgZd	Zd
S )ÚInvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    z/tmpz/tmp/z/tmp/a.htmlz/a%3cd.htmlz/a%3Cd.htmlz	/a/b.htmlz/%7Ejoe/index.htmlr4   N)r"   r#   r$   r	   r   r   r+   r   r   r   r   r6   …   s
   	
r6   c               @   s   e Zd ZdZdgZg ZdS )ÚInvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    z	/foo.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r7   •   s   r7   c               @   s    e Zd ZdZdZdgZdgZdS )ÚAnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    Ú	Googlebotz/folder1/myfile.htmlz/folder1/anotherfile.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r   r8   ¡   s   r8   c               @   s   e Zd ZdZdZdgZdS )ÚUserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    r9   z/something.jpgN)r"   r#   r$   r	   r   r   r   r   r   r   r:   ®   s   
r:   c               @   s   e Zd ZdZdS )ÚUserAgentGoogleMobileTestzGooglebot-MobileN)r"   r#   r$   r   r   r   r   r   r;   ½   s   r;   c               @   s    e Zd ZdZdZdgZdgZdS )ÚGoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    Z	googlebotz/folder1/myfile.htmlz/folder1/anotherfile.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r   r<   Á   s   r<   c               @   s   e Zd ZdZdgZdgZdS )ÚDisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    z
/some/pathz/some/path?name=valueN)r"   r#   r$   r	   r   r   r   r   r   r   r=   Î   s   r=   c               @   s   e Zd ZdZdgZdgZdS )ÚUseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathz
/some/pathN)r"   r#   r$   r	   r   r   r   r   r   r   r>   Ø   s   r>   c               @   s   e Zd ZdZdgZdgZdS )ÚEmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?N)r"   r#   r$   r	   r   r   r   r   r   r   r?   å   s   r?   c               @   s0   e Zd ZdZej dd¡ZdZddgZ	dgZ
dS )	ÚDefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    r4   é   é   r&   z
/test.htmlz/cyberworld/map/index.htmlN)r"   r#   r$   r	   r   r   r-   r,   r+   r   r   r   r   r   r   r@   ð   s
   r@   c               @   s   e Zd ZdZdZdd„ ZdS )ÚStringFormattingTestzÆUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zzUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/

c             C   s   |   t| jƒ| j¡ d S )N)r*   Ústrr   Úexpected_output)r   r   r   r   Útest_string_formatting  s    z+StringFormattingTest.test_string_formattingN)r"   r#   r$   r	   rE   rF   r   r   r   r   rC   ý   s   
rC   c               @   s   e Zd Zdd„ Zdd„ ZdS )ÚRobotHandlerc             C   s   |   dd¡ d S )Ni“  zForbidden access)Z
send_error)r   r   r   r   Údo_GET  s    zRobotHandler.do_GETc             G   s   d S )Nr   )r   ÚformatÚargsr   r   r   Úlog_message  s    zRobotHandler.log_messageN)r"   r#   r$   rH   rK   r   r   r   r   rG     s   rG   c               @   s*   e Zd Zdd„ Zdd„ Zejdd„ ƒZdS )ÚPasswordProtectedSiteTestCasec             C   sB   t tjdftƒ| _tjd| jjddid| _d| j_	| j 
¡  d S )Nr   zHTTPServer servingZpoll_intervalg{®Gáz„?)ÚnameÚtargetÚkwargsT)r   r   ÚHOSTrG   ÚserverÚ	threadingZThreadZserve_foreverÚtZdaemonÚstart)r   r   r   r   r   #  s    z#PasswordProtectedSiteTestCase.setUpc             C   s"   | j  ¡  | j ¡  | j  ¡  d S )N)rQ   ZshutdownrS   ÚjoinZserver_close)r   r   r   r   ÚtearDown0  s    

z&PasswordProtectedSiteTestCase.tearDownc             C   s\   | j j}dtj d t|d ƒ }|d }tj ¡ }| |¡ | 	¡  |  
| d|¡¡ d S )Nzhttp://ú:rB   z/robots.txtÚ*)rQ   Zserver_addressr   rP   rD   r   r   r   Zset_urlÚreadr    r   )r   Zaddrr   Z
robots_urlr   r   r   r   ÚtestPasswordProtectedSite5  s    

z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r"   r#   r$   r   rV   r   Zreap_threadsrZ   r   r   r   r   rL   !  s   rL   c               @   sF   e Zd ZdZd e¡Zedd„ ƒZdd„ Zdd„ Z	d	d
„ Z
dd„ ZdS )ÚNetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc          	   C   s@   t  d¡ t  | j¡  tj | j¡| _| j 	¡  W d Q R X d S )NZnetwork)
r   ZrequiresZtransient_internetÚbase_urlr   r   r   r	   r   rY   )Úclsr   r   r   Ú
setUpClassE  s    
zNetworkTestCase.setUpClassc             C   s$   d  | j|tj |¡d sdnd¡S )Nz{}{}{}rB   r&   r   )rI   r\   ÚosÚpathÚsplitext)r   r`   r   r   r   r   L  s    zNetworkTestCase.urlc             C   sV   |   | jj¡ |   | jj¡ |  | j ¡ d¡ |   | j d¡¡ |   | j d¡¡ d S )Nr   rX   )r    r   Údisallow_allÚ	allow_allZassertGreaterÚmtimer+   r,   )r   r   r   r   Ú
test_basicQ  s
    zNetworkTestCase.test_basicc             C   s˜   |   | j d|  d¡¡¡ |  | j d| j¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |   | j d| j¡¡ d S )NrX   Z	elsewhereZNutchZbrianZwebstats)r   r   r   r   r    r\   )r   r   r   r   Útest_can_fetchX  s    zNetworkTestCase.test_can_fetchc             C   sf   t j |  d¡¡}| ¡  |  |j¡ |  |j¡ |  	| 
¡ d¡ |  | d¡¡ |  | d¡¡ d S )Nzi-robot.txtr   rX   )r   r   r   r   rY   r   rc   r    rb   r*   rd   ZassertIsNoner+   r,   )r   r   r   r   r   Útest_read_404`  s    zNetworkTestCase.test_read_404N)r"   r#   r$   r\   rI   r	   Úclassmethodr^   r   re   rf   rg   r   r   r   r   r[   @  s   
r[   Ú__main__)$r   r_   rR   ZunittestZurllib.robotparserr   Ztestr   Zhttp.serverr   r   r   ZTestCaser%   r'   r(   r)   r/   r0   r5   r6   r7   r8   r:   r;   r<   r=   r>   r?   r@   rC   rG   rL   r[   r"   Úmainr   r   r   r   Ú<module>   s<   

	)
