Python 是我比较喜欢的语言, 莫名的喜欢, 对 Python 的学习可能起初是敲错了网址开始的, 哈哈哈~
工作的任务从一个网站后台做登录, 爬取数据, 写入服务器 Redis 中, 同事认为我会用 PHP 来写, 哼! 让你猜到那该多没意思, 于是乎有了如下 Python 的代码, 你看 50 多行搞定了.
- #!/usr/bin/python3
- import requests
- import re
- import Redis
- from pyquery import PyQuery as pq
- loginUrl = 'https://manage.xxx.com.cn/home/login'
- userName = 'xxx'
- passWord = 'xxx'
- redisServer = '192.168.0.2'
- redisPort = 6379
- redisPass = ''
- productList = {'椰油':'CL_Spot','咖啡':'COFFEE','工业铜':'COPPER'}
- volumeList = {'CL_Spot':[0, 0], 'COFFEE':[0, 0], 'COPPER':[0, 0]}
- def main():
- jsessionid = getCookie()
- doLogin(jsessionid)
- dataUrl = 'https://manage.xxx.cn/?pageNo=1&pageSize=100'
- cookies = {'JSESSIONID': jsessionid}
- r = requests.get(dataUrl, cookies = cookies)
- dom = pq(r.text)
- lines = dom('table').eq(1).find('tr').items()
- for line in lines:
- line = re.sub(r'<!--.*-->', '', str(line))
- pattern = re.compile(r'<td>(.*?)</td>')
- group = pattern.findall(line)
- if not group:
- continue
- productCode = productList[group[3]]
- if group[6] == '买':
- volumeList[productCode][0]+= int(group[7]) * int(group[8])
- if group[6] == '卖':
- volumeList[productCode][1]+= int(group[7]) * int(group[8])
- redisClient = Redis.Redis(host=redisServer, port=redisPort, password=redisPass)
- for x in volumeList:
- keyUp = 'redis_order_count_u_%s' % x
- keyDown = 'redis_order_count_d_%s' % x
- redisClient.set(keyUp, int(volumeList[x][0]))
- redisClient.set(keyDown, int(volumeList[x][1]))
- def getCookie():
- ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) ApplewebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
- r = requests.get(loginUrl, headers = ua)
- return r.cookies['JSESSIONID']
- def doLogin(jsessionid):
- param = {'userName': userName, 'password': passWord}
- cookies = {'JSESSIONID': jsessionid}
- requests.post(loginUrl, data = param, cookies = cookies)
- if __name__ == '__main__':
- main()
另一个服务也需要这个需求, 用了最近看的 Golang 来实现一次, 瞧写了 100 多行
- package main
- import (
- "fmt"
- "net/http"
- "net/url"
- "os"
- "strings"
- "strconv"
- "gopkg.in/redis.v4"
- "github.com/PuerkitoBio/goquery"
- )
- var loginUrl string = "https://manage.xxx.com.cn/home/login"
- var dataUrl string = "https://manage.xxx.com.cn/?pageNo=1&pageSize=100"
- var userName string = "xxx"
- var passWord string = "xxx"
- var redisServer string = "192.168.1.2"
- var redisPort string = "6379"
- var redisPass string = ""
- var redisDB int = 0
- func main() {
- productList := make(map[string] string)
- productList["椰油"] = "CL_Spot"
- productList["咖啡"] = "COFFEE"
- productList["工业铜"] = "COPPER"
- volumeList := make(map[string] int)
- volumeList["u_CL_Spot"] = 0
- volumeList["d_CL_Spot"] = 0
- volumeList["u_COFFEE"] = 0
- volumeList["d_COFFEE"] = 0
- volumeList["u_COPPER"] = 0
- volumeList["d_COPPER"] = 0
- jsessionid := getCookie()
- doLogin(jsessionid)
- request, err := http.NewRequest("GET", dataUrl, nil)
- request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
- client := &http.Client{}
- response, err := client.Do(request)
- if err != nil {
- fmt.Println(err.Error())
- os.Exit(0)
- }
- defer response.Body.Close()
- doc, err := goquery.NewDocumentFromReader(response.Body)
- doc.Find("table").Eq(1).Find("tr").Each(func(i int, tr *goquery.Selection) {
- td := tr.Find("td")
- name := td.Eq(3).Text()
- dir := td.Eq(6).Text()
- if val, ok := productList[name]; ok {
- buyNum, _ := strconv.Atoi(td.Eq(7).Text())
- buyUnit, _ := strconv.Atoi(td.Eq(8).Text())
- num := buyNum * buyUnit
- cacheKey := "" if dir ==" 买 " {
- cacheKey = fmt.Sprintf("u_%s", val)
- } else if dir == "卖" {
- cacheKey = fmt.Sprintf("d_%s", val)
- }
- volumeList[cacheKey] += num
- }
- })
- redisClient := Redis.NewClient(&Redis.Options{
- Addr: fmt.Sprintf("%s:%s", redisServer, redisPort),
- Password: redisPass,
- DB: redisDB,
- })
- for k, v := range volumeList {
- strKey := fmt.Sprintf("redis_order_count_%s", k)
- redisClient.Set(strKey, int(v), 0)
- }
- fmt.Println("puti volume get success")
- }
- func getCookie() string {
- jsessionid := ""
- response, err := http.Get(loginUrl)
- if err != nil {
- fmt.Println(err.Error())
- os.Exit(0)
- }
- defer response.Body.Close()
- for _, val := range response.Cookies() {
- if val.Name == "JSESSIONID" {
- jsessionid = val.Value
- }
- }
- return jsessionid
- }
- func doLogin(jsessionid string) bool {
- data := url.Values{}
- data.Set("userName", userName)
- data.Add("password", passWord)
- request, _ := http.NewRequest("POST", loginUrl, strings.NewReader(data.Encode()))
- request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
- request.Header.Add("Content-Length", strconv.Itoa(len(data.Encode())))
- request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
- client := &http.Client{}
- response, err := client.Do(request)
- if err != nil {
- fmt.Println(err.Error())
- os.Exit(0)
- }
- defer response.Body.Close()
- return true
- }
Python 的实现到上线半天的功夫搞定了, Go 足足搞了 1 整天, 蹩脚的语法与不熟悉的语法让我学习了很多知识点, 最后 Mac 编译到 Linux 上执行也给我上了一课.
觉得入门学习这两门语言挺好, 一个是脚本语言另一个是编译语言, 用处都很广泛. 轩轩你准备好了吗?
来源: http://www.bubuko.com/infodetail-2862772.html