- #!/usr/bin/python
- # -*- coding:utf8 -*-
- import requests
- import re
- import os
- import time
- # from urllib
- import json
- from bs4 import BeautifulSoup
- from datetime import date
- def getTimeExpire(time_play,time_gap):
- # print(time_play)
- try:
- time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")
- except:
- print('时间转化失败')
- return ''
- else:
- t1=time.mktime(time_arr)
- x = time.localtime(t1+time_gap)# 是秒不是毫秒
- return time.strftime('%Y-%m-%d %H:%M:%S',x)
- def gethtml():
- # 改成从网站直接获取, 但是网站需要分页
- with open('F:\\test\\python\\worldcup.html', 'r',encoding='utf-8') as f:
- content = f.read()
- soup = BeautifulSoup(content,'lxml')
- nodes=soup.select('.b-pull-refresh-content> div')
- arr=[]
- # 写入 CSV 文件的头部
- filename = "F:\\test\\python\\worldcup.csv"
- f = open(filename,'a')
- f.writelines('team1,team2,time_expire,time_play \n')
- f.close()
- for node in nodes:
- date = node.select('.wa-match-schedule-list-title')[0].get_text().strip()
- datas = node.select('.sfc-contacts-list .wa-match-schedule-list-item')
- for d in datas:
- obj={'team1':'','team2':'','time':''}
- obj['team1']=d.select('.wa-tiyu-schedule-item-name.c-line-clamp1')[0].get_text().strip()
- obj['team2']=d.select('.wa-tiyu-schedule-item-name.c-line-clamp1')[1].get_text().strip()
- obj['time_play']='2018-'+date[2:8]+''+d.select('.status-text')[0].get_text().strip()+':00'obj['time_expire']=getTimeExpire(obj['time_play'],-10*60)
- filename = "F:\\test\\python\\worldcup.csv"
- f = open(filename,'a')
- f.writelines(obj['team1']+','+obj['team2']+','+obj['time_expire']+','+obj['time_play']+'\n')
- f.close()
- #getHtml()
- def getFromAPI():
- month=6
- day=11
- # 从 2018-06-14 到 07-15
- for d in range(0,15):
- day+=2
- if day>30:
- month+=1
- day=1
- url="http://tiyu.baidu.com/api/match/世界杯/live/date/2018-"+str(month)+'-'+str(day)+"/direction/after?from=self"
- time.sleep(1)
- data = json.loads(requests.get(url,timeout=3).text)
- if(data['status']=='0'):
- print('为 0')
- for matches in data['data']:
- for m in matches['list']:
- filename = "F:\\test\\python\\worldcupFromAPI.csv"
- f = open(filename,'a')
- if m['startTime']>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):
- f.writelines(m['leftLogo']['name']+','+m['rightLogo']['name']+','+getTimeExpire(m['startTime'],-10*60)+','+m['startTime']+'\n')
- f.close()
- getFromAPI()
来源: http://www.bubuko.com/infodetail-2654981.html