- #coding=utf-8
- import re
- import requests
- import urllib
- import os
- import MySQLdb
- #初始化配置参数
- path = ‘img/‘ #图片存放目录
- tableName0 = ‘imgTable‘ #表名
- #文件操作,如果不存在该目录,创建文件夹
- if not os.path.exists(path):
- os.makedirs(path)
- # 连接数据库 mysql
- conn= MySQLdb.connect(
- host=‘localhost‘,
- port = 3306,
- user=‘root‘,
- passwd=‘123‘,
- db =‘test‘,
- charset=‘utf8‘,
- )
- cur = conn.cursor()
- print "连接成功"
- # 如果数据表已经存在使用 execute() 方法删除表。
- cur.execute("DROP TABLE IF EXISTS %s" % tableName0)
- #创建数据表
- cur.execute("create table %s(num varchar(2) ,name varchar(200),url varchar(200))" % tableName0)
- #获取html网页
- def getHtml(url):
- page = urllib.urlopen(url)
- html = page.read()
- return html
- #根据网页内容获取图片地址列表
- def getImg(html):
- reg = r‘" src="(.+?\=550x0)" style="‘
- imgre = re.compile(reg)
- imglist = imgre.findall(html)
- print imglist
- return imglist
- #t图片保存
- def save_img(url,path):
- message = None
- print path
- try:
- file = open(path + os.path.basename(url),‘wb‘)
- request = urllib.urlopen(url)
- file.write(request.read())
- except Exception as e:
- #捕获异常,定义异常实例e,可以捕获除与程序退出sys.exit()相关之外的所有异常
- message = str(e)
- print message
- else:
- #如果try中的语句没有引发异常,则执行else中的语句
- message = os.path.basename(url)
- finally:
- #无论是否出现异常,都执行的代码
- if not file.closed:
- file.close()
- return message
- #插入数据
- def insertIntoDb(list):
- i = 1
- for imgurl in list:
- id1 = i
- urlR = dealURL(imgurl, ‘?imageView&thumbnail=550x0‘)
- name = os.path.basename(urlR)
- # 插入数据
- sql = ‘insert into %s values( \‘%d\‘,\‘%s\‘,\‘%s\‘)‘ % (tableName0, id1, name, urlR)
- cur.execute(sql)
- #处理url,处理成以.jpg或.png结尾的url
- def dealURL(url,str):
- #方法一:截取
- # 从左往右,第一个(默认)到倒数第三十个结束,截取间距为1.
- # urlTemp = url[:-30:1]
- #方法二:替换
- urlTemp = url
- urlTemp = urlTemp.replace(str,‘‘)
- return urlTemp
- #保存图片到本地,在本程序中没用到
- def saveImge(imgList):
- x = 1
- for imgurl in imgList:
- #方法一
- # urllib.urlretrieve(imgurl, ‘%s.jpg‘ % x)
- # print imgurl
- #方法二,可定义存储位置
- pic = requests.get(imgurl, timeout=10)
- string = path + str(x) + ‘.jpg‘
- fp = open(string, ‘wb‘)
- fp.write(pic.content)
- fp.close()
- #方法三,图片名为url后面的一串
- # path1 = path + str(x)+‘--‘ #添加数字方便统计
- # save_img(imgurl,path1)
- x += 1
- # 方法四,从数据库获取,然后下载
- def saveImageFromDb():
- # 获得表中有多少条数据
- allData = cur.execute("select * from %s" % tableName0)
- print allData
- # 打印表中的多少数据
- list = cur.fetchmany(allData)
- x=1
- for data in list:
- path1 = path + str(x) + ‘--‘
- #data有三项,分别[0,1,2],对应的为[num,name,url]
- url= data[2]
- print url
- save_img(url, path1)
- x+=1
- print "保存完成"
- html = getHtml("http://news.163.com/17/0831/07/CT5B1SJB000181BT.html")
- imgList123 = getImg(html)
- insertIntoDb(imgList123)
- saveImageFromDb()
- # saveImge(imgList123)
- # #翻转测试
- # sStr1 = ‘abcdefg‘
- # sStr1 = sStr1[::-1]
- # print sStr1
- #截取测试
- # str = ‘0123456789‘
- # print "1:",str[:-1:2]
- # print "2:",str[:0:-3]
- # print "3:",str[-8:8:2]
- # print "4:",str[-2:2:-2]
- # print "5:",str[:-1:]
- # print "6:",str[::]
- # print "7:",str[::-1][:3]
- #关闭数据库
- cur.close()
- conn.commit()
- conn.close()
来源: http://www.bubuko.com/infodetail-2294526.html