- from BeautifulSoup import BeautifulSoup
- import urllib2
- content = urllib2.urlopen('http://www.xxx').read()
- htm=unicode(content,'gb2312','ignore').encode('utf-8','ignore')
- soup = BeautifulSoup(htm)
- list = soup.find(id='news_sort').find('ul')
- for i in list.contents:
- item = i.find('a')
- if (item):
- href = item['href']
- title = item.string
- print(title+' '+href)
- # 先把数据库弄下来,建立模板,把数据填上去
- #该片段来自于http://www.codesnippet.cn/detail/050820134956.html
来源: http://www.codesnippet.cn/detail/050820134956.html