- import htmlParser, urllib
- class linkParser(HTMLParser.HTMLParser):
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
- self.links = []
- def handle_starttag(self, tag, attrs):
- if tag=='a':
- self.links.append(dict(attrs)['href'])
- htmlSource = urllib.urlopen("<a href="http://www.codeSnippet.cn">http://www.codeSnippet.cn").read(200000)
- p = linkParser()
- p.feed(htmlSource)
- for link in p.links:
- print link
- #该片段来自于http://www.codesnippet.cn/detail/100120131474.html
来源: http://www.codesnippet.cn/detail/100120131474.html