查单词的脚本

 
#!/usr/bin/python
# coding:utf-8 
# usage1: 'python **.py word' to find the word's explination or
# usage2: 'python **.py word -detail' to list the phrase of the word
from bs4 import BeautifulSoup
import sys
import urllib2
 
# 判断用户输入的是英文还是中文
# decode 是将其他字符编码转换成unicode
# 中文的unicode字符区是 'u4e00' 到 '\\u9fa5'
 
def is_Chinese(uchar):
    uchar = uchar.decode('utf8')
    return uchar >= u'\\u4e00' and uchar <= u'\\u9fa5'
# 格式输入错误的提示
 
 
def input_error():
    print "usage1: 'python **.py word' or "
    print "usage2: 'python **.py word -detail (For English Only)"
 
 
def main():
    length = len(sys.argv)
    # 读入命令行
    if length < 2:
        input_error()
        sys.exit()
    else:
        word = sys.argv[1]
        # url是有道翻译的对应网页
     
    url = 'http://dict.youdao.com/search?le=en&q=%s&keyfrom=dict.index' % word
    # urlopen函数调用
    # print url
    data = urllib2.urlopen(url).read()
    # print data
    # 将data网页源代码放到soup中，便于匹配
    soup = BeautifulSoup(data)
    # soup.find() 可以放入标签和class名称
    word_div = soup.find('div', 'trans-container')
    if not word_div:
        print 'word does not exists'
        sys.exit()
    # word_div存了一个标签，每个单词意思，存在每个<li>**</li>中,格式化输出，不带'<li></li>'
    if is_Chinese(word):
        for word_explination in word_div.find_all('a', 'search-js'):
            print word_explination.string
    else:
        for word_explination in word_div.find_all('li'):
            print str(word_explination)[4: -5]
 
    # 第二种输入，还得显示出单词组成的短语
    # BeautifulSoup提供的find函数可以查找'p', 'a','span'等html标志语言模块
    if length == 3:
        if sys.argv[2] != '-detail':
            input_error()
        else:
            raw_explination = ''
            span = soup.find_all('p', 'wordGroup')
            # 最后一组词组有问题，删去了
            for element in span[: -1]:
                link = element.find('a')
 
                # 转化成字符串，用于定位到解释那里
                raw_explination += str(element)
                location = raw_explination.find('</span>')
                # 格式化单词解释，</span>长度为7, </p>长度为4, 只采用第一个意思
                explination = raw_explination[location + 7: -4].split()[0]
                # python 自带的输出对齐, ljust(num), rjust(num)
                print link.string.ljust(25), explination
                raw_explination = ''
 
if __name__ == '__main__':
    main()
#该片段来自于http://www.codesnippet.cn/detail/151020136415.html
来源: http://www.codesnippet.cn/detail/151020136415.html
与本文相关文章

暂无,快来抢沙发吧！