- #!python3
- #multidownloadXkcd.py - Download XKCD comics using multiple threads.
-
- import requests
- import bs4
- import os
- import threading
-
- # os.mkdir('xkcd', exist_ok=True) # store comics in ./xkcd
- if os.path.exists('xkcd'):
- print("xkcd is existed!")
- else:
- os.mkdir('xkcd')
-
- def downloadXkcd(startComic, endComic):
- for urlNumber in range(startComic, endComic):
- #Download the page
- print("Downloading page http://xkcd.com/%s..." % urlNumber)
- res = requests.get('http://xkcd.com/%s' % urlNumber)
- res.raise_for_status()
-
- print(res.text)
- soup = bs4.BeautifulSoup(res.text)
-
- #Find the URL of the comic image.
- comicElem = soup.select('#comic img')
- if comicElem == []:
- print('Could not find comic images.')
- else:
- comicUrl = comicElem[0].get('src')
- # #Download the image.
- # print('Downloading image %s...' % (comicUrl))
- # res = requests.get(comicUrl)
- # res.raise_for_status()
- #
- # # Save the image to ./xkcd
- # imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
- # for chunk in res.iter_content(100000):
- # imageFile.write(chunk)
- # imageFile.close()
-
- downloadThread = threading.Thread(target=downloadXkcd(555, 557))
- downloadThread.start()
-
- # # TODO: Create and start the thread objects
- # downloadThreads = [] # a list of all the Thread objects
- # for i in range(500, 600, 10):
- # downloadThread = threading.Thread(target=downloadXkcd, args=(i, i+9))
- # downloadThreads.append(downloadThread)
- # downloadThread.start()
- #
- # # TODO: Wait for all threads to end
- # for downloadThread in downloadThreads:
- # downloadThread.join()
- # print("Done.")