pychallenge之四
<!-- urllib may help. DON'T TRY ALL NOTHINGS, since it will never
end. 400 times is more than enough. -->
点图片跳出了:and the next nothing is 44827,看起来似乎是嵌套的网页
# -*- coding: utf-8 -*-
import requests
import re
import time
class NothingException(Exception):
def __init__(self, length, rtvalue):
Exception.__init__(self)
self.length = length
self.rtvalue = rtvalue
def findnothing(start_noth):
"""
:type start_noth: str
:rtype nothing : str
"""
nothing = start_noth
url_prefix = 'http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing='
url = url_prefix + nothing
try:
for i in range(400):
r = requests.get(url)
print "now it's %d time for requesting for page:%s" % (i, url)
time.sleep(1)
if r.status_code == 200:
nothing = re.findall('and the next nothing is \d+', r.content) # 取出固定格式的目标字串
if len(nothing) != 1:
raise NothingException(len(nothing), 1) # 不只一个结果的话说明文言有变动
else:
url = url_prefix + str(re.findall('\d+', nothing[0])[0]) # 正常情况下取出nothing继续访问
else:
print "get page %s failed" % url
break
except NothingException:
print "now it's necessary for you to determine <nothing>."
print "hint is: %s" % r.content
m_noth = raw_input("please input <nothing>:") # 非正常文言情况下需要手动确定nothing
findnothing(m_noth)
return nothing
if __name__ == '__main__':
print findnothing('12345')
最后结果是peak.html