- # -*- coding: utf-8 -*-
- from bs4 import BeautifulSoup
- import urllib2,re,time
- keys=["玩具","儿童玩具","少儿玩具","智力玩具","益智玩具","安全玩具"]
- urls={}#放排名总合#小数部分存放找到次数整数部分存放排名总数
- urls1={}#放排名次数
- urls2={}#放平均排名
- urls3={}#计算关键词覆盖率
- for key in keys:
- key=urllib2.quote(key)#汉字转码
- html=urllib2.urlopen("http://www.baidu.com/s?word=%s" %key)
- soup=BeautifulSoup(html)
- n=1
- for tables in soup.find_all("table",class_=re.compile("result"),id=re.compile("\\d{1,2}")):
- if tables.get("mu") and not tables.find("span","g"):#百度产品
- #mu="baidu.com/link?url=为百度知道baidu.com/s?tn=baidurt&rtt=1是新闻
- print n,
- if tables.get("mu").find("link?url=")+1:
- print "zhidao.baidu.com"
- url="zhidao.baidu.com"
- elif tables.get("mu").find("s?tn=baidurt")+1:
- print "news.baidu.com"
- url="news.baidu.com"
- else:
- print tables.get("mu").split("http://")[1].split("/")[0]
- url=tables.get("mu").split("http://")[1].split("/")[0]
- else:
- print n,
- if tables.find("span","g").get_text().find("wenku.baidu")+1:
- print "wenku.baidu.com"
- url="wenku.baidu.com"
- else:
- print tables.find("span","g").get_text().split(" ")[2].split("/")[0]
- url=tables.find("span","g").get_text().split(" ")[2].split("/")[0]
- try:
- urls[url]+=n
- urls1[url]+=1
- except:
- urls[url]=n
- urls1[url]=1
- n+=1
- print urls
- print "\\n"*3
- for each in urls:
- urls2[each]=urls[each]/urls1[each]
- print each
- print "total sum position is "+str(urls[each])
- print "Ranked "+str(urls1[each])+" Keywords"
- print "Keywords coverage is "+str(float(urls1[each])/len(keys)*100).split(".")[0]+"%"
- print "average position is "+str(urls2[each])
- print "\\n"*4
- #该片段来自于http://www.codesnippet.cn/detail/190720134692.html
来源: http://www.codesnippet.cn/detail/190720134692.html