- import urllib
- #读出一个URL下的a标签里href地址为.html的所有地址
- content = urllib.urlopen('http://www.hoopchina.com').read()
- s1=0
- while s1>=0:
- begin = content.find(r'<a',s1)
- m1 = content.find(r'href=',begin)
- m2 = content.find(r'>',m1)
- if(content[m1:m2].find(r'.html')!=-1):
- m2 = content.find(r'.html',m1)
- url = content[m1+6:m2+5]
- print url
- s1=m2
- #该片段来自于http://www.codesnippet.cn/detail/231220138178.html
来源: http://www.codesnippet.cn/detail/231220138178.html