一. 进程和线程的区别?
第一:
进程是 cpu 资源分配的最小单元.
线程是 cpu 计算的最小单元.
第二:
一个进程中可以有多个线程.
第三:
对于 Python 来说他的进程和线程和其他语言有差异, 是有 GIL 锁.
GIL 锁保证一个进程中同一时刻只有一个线程被 cpu 调度.
IO 密集型操作可以使用多线程; 计算密集型可以使用多进程;
二. 面向对象补充:
- class Foo(object):
- def __init__(self):
- object.__setattr__(self, 'info', {}) # 在继承的对象中设置值的本质
- def __setattr__(self, key, value): # 会拦截所有属性的的赋值语句
- self.info[key] = value
- def __getattr__(self, item): # 拦截点号运算. 当对未定义的属性名称和实例进行点号
- # 运算时, 就会用属性名作为字符串调用这个方法. 如果继承树可以找到该属性, 则不调用此方法
- print(item) # name
- return self.info[item]
- obj = Foo()
- obj.name = 'nacho'
- print(obj.name) # nacho
- print(obj.info) # {'name': 'nacho'}
三. 进程
- 进程间数据不共享
- data_list = []
- def task(arg):
- data_list.append(arg)
- print(data_list)
- def run():
- for i in range(10):
- p = multiprocessing.Process(target=task,args=(i,))
- # p = threading.Thread(target=task,args=(i,))
- p.start()
- if __name__ == '__main__': # win10 需要用这个, linux 不需要
- run()
- 常用功能:
- - join
- - deamon
- - name
- - multiprocessing.current_process()
- - multiprocessing.current_process().ident/pid
- 类继承方式创建进程
- class MyProcess(multiprocessing.Process):
- def run(self):
- print('当前进程',multiprocessing.current_process())
- def run():
- p1 = MyProcess()
- p1.start()
- p2 = MyProcess()
- p2.start()
- if __name__ == '__main__':
- run()
四. 进程间数据共享
- Queue:
- linux:
- q = multiprocessing.Queue()
- def task(arg,q):
- q.put(arg)
- def run():
- for i in range(10):
- p = multiprocessing.Process(target=task, args=(i, q,))
- p.start()
- while True:
- v = q.get()
- print(v)
- run()
- windows:
- def task(arg,q):
- q.put(arg)
- if __name__ == '__main__':
- q = multiprocessing.Queue()
- for i in range(10):
- p = multiprocessing.Process(target=task,args=(i,q,))
- p.start()
- while True:
- v = q.get()
- print(v)
- Manager:(*)
- Linux:
- m = multiprocessing.Manager()
- dic = m.dict()
- def task(arg):
- dic[arg] = 100
- def run():
- for i in range(10):
- p = multiprocessing.Process(target=task, args=(i,))
- p.start()
- input('>>>')
- print(dic.values())
- if __name__ == '__main__':
- run()
- windows:
- def task(arg,dic):
- time.sleep(2)
- dic[arg] = 100
- if __name__ == '__main__':
- m = multiprocessing.Manager()
- dic = m.dict()
- process_list = []
- for i in range(10):
- p = multiprocessing.Process(target=task, args=(i,dic,))
- p.start()
- process_list.append(p)
- while True:
- count = 0
- for p in process_list:
- if not p.is_alive():
- count += 1
- if count == len(process_list):
- break
- print(dic)
五. 进程锁
- import time
- import threading
- import multiprocessing
- lock = multiprocessing.RLock()
- def task(arg):
- print('鬼子来了')
- lock.acquire()
- time.sleep(2)
- print(arg)
- lock.release()
- if __name__ == '__main__':
- p1 = multiprocessing.Process(target=task,args=(1,))
- p1.start()
- p2 = multiprocessing.Process(target=task, args=(2,))
- p2.start()
六. 进程池
- import time
- from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
- def task(arg):
- time.sleep(2)
- print(arg)
- if __name__ == '__main__':
- pool = ProcessPoolExecutor(6) # 取决于 CPU 的核心数
- for i in range(10):
- pool.submit(task,i)
七. 爬虫:
示例:
- import requests
- from bs4 import BeautifulSoup
- from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
- # 模拟浏览器发送请求
- # 内部创建 sk = socket.socket()
- # 和抽屉进行 socket 连接 sk.connect(...)
- # sk.sendall('...')
- # sk.recv(...)
- def task(url):
- print(url)
- r1 = requests.get(
- url=url,
- headers={
- 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) ApplewebKit/537.36 (Khtml, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
- }
- )
- # 查看下载下来的文本信息
- soup = BeautifulSoup(r1.text,'html.parser')
- print(soup.text)
- # content_list = soup.find('div',attrs={'id':'content-list'})
- # for item in content_list.find_all('div',attrs={'class':'item'}):
- # title = item.find('a').text.strip()
- # target_url = item.find('a').get('href')
- # print(title,target_url)
- def run():
- pool = ThreadPoolExecutor(5)
- for i in range(1,50):
- pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
- if __name__ == '__main__':
- run()
来源: http://www.bubuko.com/infodetail-2765353.html