批量下载腾讯图库图片

 
from optparse import OptionParser;
from urllib.parse import urlparse;
import json;
import os;
import urllib.request;
 
def vaildChange(script, key):
    return script.replace(key + ':', '\\"' + key + '\\":');
 
def splitJavascript(script, key, end):
    realKey = '\\"' + key + ' \\":';
    try:
        temp = script[script.index(realKey) + len(realKey):script.index('\\"' + end)];
        return temp[:temp.rfind(',')];
    except:
        return '[]';
 
def dowmloadFile(url, foldname, filename):
    try:
        f = urllib.request.urlopen(url)
        sb = f.read(-1)
        f.close()
        sERR = ""
    except   Exception as Err:
        sERR = str(Err)
    if sERR != "":
        return sERR
    try:
        f1 = open(foldname + "/" + filename, "wb");
        f1.write(sb);
        f1.close();
    except Exception as Err :
        sERR = str(Err)
    return sERR
 
if __name__ == '__main__':
    parser = OptionParser(version="%prog 1.0");
    parser.add_option('-t', dest='targetUrl', help='要下载的图库网址');
    (options, args) = parser.parse_args();
    if not (options.targetUrl):
        parser.error('没有指定网址！');
    else:
        o = urlparse(options.targetUrl);
        if(o.netloc == 'comic.qq.com' and o.path == '/disppics.htm' and o.scheme == 'http'):
            print('开始运行');
            print('目标：' + options.targetUrl);
            key = o.fragment;
            nPos = key.index('&tid=');
            key = key[4:nPos];
            structUrl = 'http://comic.qq.com/d/pic/1/{key}/plist.js';
            structUrl = structUrl.replace('{key}', key);
            print('正在获取信息');
            jsonData = urllib.request.urlopen(structUrl).read().decode('gb2312', 'ignore');
            jsonData = jsonData[len('var oPiclib='):jsonData.index(';/*') - 1];
            temp = ['nID', 'nDataID', 'nTypeID', 'sOriginalImgUrl', 'sZoomImgUrl', 'sDesc', 'sTheD', 'arrPic24', 'arrPic23', 'arrPic25', 'brandid', 'brandname', 'brandurlhead', 'oPicInfo', 'showinfo', 'dname', 'durl', 'typelist', 'typepnum', 'typename', 'stpicnum', 'id', 'name', 'arrPic23 ', 'arrPic24 ', 'arrPic25 '];
            for tem in temp:
                jsonData = vaildChange(jsonData, tem);
            targetPics = [];
            try:
                temp = ['arrPic23', 'arrPic24', 'arrPic25', 'oPicInfo'];
                jsonData.index('arrPic25');
            except:
                temp = ['arrPic23', 'arrPic24', 'oPicInfo'];
            for i in range(0, len(temp) - 1):
                data = splitJavascript(jsonData, temp[i], temp[i + 1]);
                data = json.loads(data);
                for item in data:
                    targetPics.append(item);
                 
            namePos = jsonData.index('\\"name\\":\\"') + len('\\"name\\":\\"');
            jsonData = jsonData[namePos:];
            name = jsonData[:jsonData.index('"')];
             
            print('共有 %s 张图片。即将开始下载' % len(targetPics));
            if not(os.path.isdir(name)):
                os.mkdir(name);
            baseUrl = 'http://img1.gtimg.com';
            index = 0;
            for item in targetPics:
                index = index + 1;
                url = baseUrl + item['sOriginalImgUrl'];
                print('正在处理第%s张' % index);
                dowmloadFile(url, name, str(index) + '.jpg');
            print('处理完成');
        else:
            parser.error('错误的网址，应该类似于http://comic.qq.com/disppics.htm#did=287&tid=25&pid=15169');
#该片段来自于http://www.codesnippet.cn/detail/250620134277.html
来源: http://www.codesnippet.cn/detail/250620134277.html
与本文相关文章

暂无,快来抢沙发吧！