- # coding:utf-8
- import requests
- from bs4 import BeautifulSoup
- import json
- import time
- import datetime
- import pymysql
- import sys
- reload(sys)
- sys.setdefaultencoding(utf-8)
- # 获取文章内容方法
- def getartinfo( url ):
- page = requests.get(url).content
- soup = BeautifulSoup(page,lxml)
- res={}
- res[curr] = soup.find(div,class_="comilla-cerrar").string.strip()
- res[title] = soup.find(h2,class_="articulo-titulo").string.strip()
- res[auchor] = soup.find(p,class_="articulo-autor").string.strip()
- res[contents] =soup.find(div,class_="articulo-contenido")
- res[add_time] = (int)(time.time())
- return res
- # 获取问答内容方法
- def getqueinfo( url ):
- page = requests.get(url).content
- soup = BeautifulSoup(page,lxml)
- res={}
- res[title] = soup.find(h4).string.strip()
- res[curr] = soup.find(div,class_="cuestion-contenido").string.strip()
- res[auchor] = soup.find(p,class_="cuestion-editor").string.strip()
- res[contents] =soup.find_all(div,class_="cuestion-contenido")[1]
- res[add_time] = (int)(time.time())
- return res
- # 抓取一个每日文章和问答
- url = "http://wufazhuce.com/"
- page = requests.get(url).content
- soup = BeautifulSoup(page,lxml)
- # 每日文章
- art_list = soup.find_all("p", class_="one-articulo-titulo")
- art_url = art_list[0].a.get(href)
- artinfo = getartinfo(art_url)
- # 每日问答
- que_list = soup.find_all("p", class_="one-cuestion-titulo")
- que_url = que_list[0].a.get(href)
- queinfo = getqueinfo(que_url)
- que_list = list(queinfo.values())
- conn = pymysql.connect(host=localhost,port=3306,user=root,password=root,db=one,charset=utf8)
- cursor = conn.cursor()
- cursor.execute("INSERT INTO day_art(title,curr,author,contents,add_time)VALUES({0},{1},{2},{3},{4});".format(artinfo[title],artinfo[curr],artinfo[auchor],artinfo[contents],artinfo[add_time]))
- cursor.execute("INSERT INTO day_art(title,curr,author,contents,add_time)VALUES({0},{1},{2},{3},{4});".format(queinfo[title],queinfo[curr],queinfo[auchor],queinfo[contents],queinfo[add_time]))
- conn.commit()
- cursor.close()
- conn.close()
- print ok
来源: http://www.bubuko.com/infodetail-2511113.html