- #!/usr/bin/python
- #coding = GBK
- import sys
- import os
- import urllib
- import re
- import MySQLdb
- #con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao')
- #cursor =con.cursor()
- reload(sys)
- sys.setdefaultencoding( "utf-8" )
- pager=urllib.urlopen('http://zx.caipiao.163.com/trend/ssq_basic.html?beginPeriod=2004001&endPeriod=2012149&historyPeriod=2012150&year=')
- data=pager.read()
- pager.close()
- p=re.compile('(?<=<tbody id="cpdata".).+?(?=</tbody)',re.I|re.S)
- data = p.search(data).group()
- data = data.decode('utf-8')
- del p
- p=data.split('</tr>');
- output = open('/home/liukai/python/data.txt', 'a')
- for i in range(len(p)):
- con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao')
- cursor =con.cursor()
- if(i == 1591):
- break
- pattern = re.compile('(?=title=).+?(?=">)',re.I|re.S)
- if((i-5) ==0 or(i-5)%6==0):
- continue
- title = pattern.search(p[i]).group()
- title = title.split('"')
- if(len(title)==2):
- title = title[1]
- else:
- title = ""
- nums = p[i].split('</td>')
- list = title
- for j in range(len(nums)):
- if(j==0):
- continue
- # print nums[j]
- # patt = re.compile('(?=chartBall).+?',re.I|re.S)
- flag = nums[j].find('chartBall')
- if(flag != -1):
- numList = nums[j].split(">");
- list =list+ numList[1]
- if(i != -1):
- date = list[7:16]
- first = list[19:21]
- second = list[21:23]
- third = list[23:25]
- four = list[25:27]
- five = list[27:29]
- six = list[29:31]
- seven = list[31:33]
- sql = "insert into 500wan (data_time,first,second,third,fouth,five,six,seven) values('"+date+"','"+first+"','"+second+"','"+third+"','"+four+"','"+five+"','"+six+"','"+seven+"')"
- cursor.execute(sql)
- cursor.close()
- con.close()
- #print list+"\\n"
- print first+"_"+second+"_"+third+"_"+four+"_"+five+"_"+six+"_"+seven+"_"+date
- #break
- #print list
- output.write(list+'\\n')
- #该片段来自于http://www.codesnippet.cn/detail/121020136358.html
来源: http://www.codesnippet.cn/detail/121020136358.html