- # -*- coding: utf-8 -*-
- # Define your item pipelines here
- #
- # Don't forget to add your pipeline to the ITEM_PIPELINES setting
- # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
- import pymysql
- class HousePipeline(object):
- def open_spider(self,spider):
- self.con=pymysql.connect(user='root',passwd='123',db='test',host='localhost',port=3306,charset='utf8')
- self.cursor=self.con.cursor(pymysql.cursors.DictCursor)
- return spider
- def process_item(self, item, spider):
- # 插入省份表
- province_num=self.cursor.execute('select * from home_province where province_name=%s',(item['province_name'],))
- if province_num:
- province_id=self.cursor.fetchone()['id']
- else:
- sql='insert into home_province(province_name) values(%s)'
- self.cursor.execute(sql,(item['province_name']))
- province_id=self.cursor.lastrowid
- self.con.commit()
- # 插入城市表
- ## 规避不同省份城市重名的情况
- city_num=self.cursor.execute('select * from home_city where city_name=%s and province_id=%s',(item['city_name'],province_id))
- if city_num:
- city_id=self.cursor.fetchone()['id']
- else:
- sql='insert into home_city(city_name,province_id) values(%s,%s)'
- self.cursor.execute(sql,(item['city_name'],province_id))
- city_id=self.cursor.lastrowid
- self.con.commit()
- # 插入区域表
- ## 规避不同城市区域重名的情况
- area_num=self.cursor.execute('select * from home_area where area_name=%s and city_id=%s',(item['area_name'],city_id))
- if area_num:
- area_id=self.cursor.fetchone()['id']
- else:
- sql = 'insert into home_area (area_name,city_id,province_id)value(%s,%s,%s)'
- self.cursor.execute(sql,(item['area_name'],city_id,province_id))
- area_id = self.cursor.lastrowid
- self.con.commit()
- # 插入楼盘信息表
- house_num=self.cursor.execute('select house_name from home_house where house_name=%s',( item['house_name'],))
- if house_num:
- pass
- else:
sql = 'insert into home_house(title,house_type,floor,oritenation,build_time,house_name,house_area,per_price,house_url,area_id,city_id,province_id) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
- self.cursor.execute(sql, (
- item['title'], item['house_type'], item['floor'], item['oritenation'], item['build_time'],
- item['house_name'], item['house_area'], item['per_price'],item['house_url'], area_id,city_id,province_id,))
- self.con.commit()
- return item
- def close_spider(self,spider):
- self.cursor.close()
- self.con.close()
- return spider
来源: https://www.cnblogs.com/gentleman-shao/p/8994141.html