- #!/usr/bin/python3.2
- import os,socket
- import urllib
- import urllib.request,threading,time
- import re,sys
- global manhuaweb,weburl,floder,chapterbegin,currentthreadnum,threadcount,mutex,mutex2
- weburl=''
- floder=''
- chapterbegin=0
- currentthreadnum=0
- threadcount=6
- if len(sys.argv)>=3:
- weburl=sys.argv[1]
- floder=sys.argv[2]
- else:
- print("usag: downloadmanhua weburl floder chapterbegin=0 threadnnum=6")
- sys.exit(0)
- if len(sys.argv)>=4:
- chapterbegin=int(sys.argv[3])
- if len(sys.argv)>=5:
- threadcount=(int)(sys.argv[4])
- def jin(i,jinzhi):
- finalans=""
- answer=i%jinzhi
- i=int(i/jinzhi)
- if answer>9:
- finalans=finalans+chr(ord('a')+(answer-10))
- else:
- finalans=finalans+str(answer)
- if i!=0:
- finalans=jin(i,jinzhi)+finalans
- return finalans
- def urlparse(p,a,c,k):
- d={}
- e=lambda c: jin(c,36)
- if 1:
- while c:
- c=c-1
- if not k[c]:
- d[jin(c,36)]=jin(c,36)
- else:
- d[jin(c,36)]=k[c]
- k=[lambda e:d[e]]
- e=lambda c:'\\\\w+'
- c=1
- newstr=""
- while c:
- c=c-1
- if k[c]:
- for i in range(0,len(p)):
- tempi=p[i]
- tempi=ord(tempi)
- if tempi>=ord('a') and tempi<=ord('f'):
- newstr+=d[chr(tempi)]
- elif tempi>=ord('0') and tempi<=ord('9'):
- newstr+=d[chr(tempi)]
- else:
- newstr+=chr(tempi)
- return newstr
- def meispower(s):
- p=re.compile(r"(?=\\}\\().*",re.IGNORECASE)
- s=p.findall(s)
- s=s[0]
- s=s[0:(len(s)-19)]
- par=s.split(',')
- par[3]=par[3][1:len(par[3])]
- answer=par[3].split('|')
- chapterpath=urlparse(par[0],int(par[1]),int(par[2]),answer)
- allurl=re.findall('imgpath=[^;]*',chapterpath)[0]
- allurl=allurl[10:(len(allurl)-2)]
- return allurl
- def pictofile(weburl,filename,loop=100):
- if loop<0:
- print('can\\'t download the picture %s'%weburl)
- return
- loop=loop-1
- if os.path.exists(filename):
- return
- try:
- url=urllib.request.urlopen(weburl)
- data=url.read()
- if len(data)<2048:
- url.close()
- pictofile(weburl,filename,loop)
- else:
- print('download from %s name is %s\\n'%(weburl,filename))
- myfile=open('%s'%filename,'wb')
- myfile.write(data)
- myfile.close()
- url.close();
- except socket.timeout:
- print('timeout')
- pictofile(weburl,filename,loop)
- except Exception as e:
- print('error',e)
- pictofile(weburl,filename,loop)
- finally:
- pass
- def downloadpic(url,loadpicdir,num):
- #download the all url picture to loadpicdir
- global currentthreadnum,mutex,mutex2
- mymode=re.compile(r'[0-9a-z.]*\\Z')
- try:
- mutex2.acquire()
- os.chdir(loadpicdir)
- mutex2.release()
- except:
- print("can't open the floder %s will be create"%loadpicdir)
- try:
- if(mutex2.locked()):
- os.mkdir(loadpicdir)
- os.chdir(loadpicdir)
- mutex2.release()
- print('create floder succeed')
- except:
- print("can't create floder %s"%loadpicdir)
- if(mutex.acquire()):
- mutex.release()
- quit(0)
- name=mymode.findall(url)
- filename='manhua'+name[0]
- pictofile(url,loadpicdir+'//'+str(num)+'-'+filename)
- mutex.acquire()
- currentthreadnum=currentthreadnum-1
- mutex.release()
- def downloadchapter(url,loadpicdir,num,begin=0):
- global manhuaweb,threadcount,currentthreadnum,mutex
- print(manhuaweb+url)
- webdata=urllib.request.urlopen(manhuaweb+url).read()
- webdata=webdata.decode('UTF-8')
- chaptername=re.findall(r'<title>[^_]*',webdata)[0]
- chaptername=chaptername[7:len(chaptername)]
- webscrip=re.findall(r'eval.*[^<>]',webdata)
- chapterurl=meispower(webscrip[0]);
- chapterurl='http://mhimg.ali213.net'+chapterurl
- for i in range(begin,num):
- try:
- while(currentthreadnum>=threadcount):
- time.sleep(0.5)
- mutex.acquire()
- currentthreadnum=currentthreadnum+1
- mutex.release()
- threading.Thread(target=downloadpic,args=(r'%s%d.jpg'%(chapterurl,i),loadpicdir+chaptername,num)).start()
- except socket.error:
- mutex.acquire()
- i=i-1
- currentthreadnum=currentthreadnum-1
- mutex.release()
- except Exception as error:
- print(error,'break')
- print('download chapter %d of picture make a error'%i)
- break
- if __name__=='__main__':
- manhuaweb=r'http://manhua.ali213.net'
- socket.setdefaulttimeout(60.0)
- mutex=threading.Lock()
- mutex2=threading.Lock()
- webfile=urllib.request.urlopen(weburl)
- webdata=webfile.read();
- webdata=webdata.decode('UTF-8')
- meshmode=re.compile(r'<div class="detail_body_right_sec_con">.*</div>')
- meshdata=meshmode.findall(webdata)[0]
- indexmode=re.compile(r'([0-9]*页)')
- indexdata=indexmode.findall(meshdata)
- picurlmode=re.compile(r'/comic/[0-9/]*.html')
- picurldata=picurlmode.findall(meshdata)
- chapterlength=len(picurldata)
- nummode=re.compile(r'[\\d]+')
- i=chapterbegin
- while i<chapterlength:
- manhuachapter=picurldata[chapterlength-i-1]
- downloadchapter(manhuachapter,floder,int(nummode.findall(indexdata[chapterlength-i-1])[0]))
- i=i+1
- #该片段来自于http://www.codesnippet.cn/detail/040320148881.html
来源: http://www.codesnippet.cn/detail/040320148881.html