- #!/usr/bin/python
- # coding:utf-8
- # usage1: 'python **.py word' to find the word's explination or
- # usage2: 'python **.py word -detail' to list the phrase of the word
- from bs4 import BeautifulSoup
- import sys
- import urllib2
- # 判断用户输入的是英文还是中文
- # decode 是将其他字符编码转换成unicode
- # 中文的unicode字符区是 'u4e00' 到 '\\u9fa5'
- def is_Chinese(uchar):
- uchar = uchar.decode('utf8')
- return uchar >= u'\\u4e00' and uchar <= u'\\u9fa5'
- # 格式输入错误的提示
- def input_error():
- print "usage1: 'python **.py word' or "
- print "usage2: 'python **.py word -detail (For English Only)"
- def main():
- length = len(sys.argv)
- # 读入命令行
- if length < 2:
- input_error()
- sys.exit()
- else:
- word = sys.argv[1]
- # url是有道翻译的对应网页
- url = 'http://dict.youdao.com/search?le=en&q=%s&keyfrom=dict.index' % word
- # urlopen函数调用
- # print url
- data = urllib2.urlopen(url).read()
- # print data
- # 将data网页源代码放到soup中,便于匹配
- soup = BeautifulSoup(data)
- # soup.find() 可以放入标签和class名称
- word_div = soup.find('div', 'trans-container')
- if not word_div:
- print 'word does not exists'
- sys.exit()
- # word_div存了一个标签,每个单词意思,存在每个<li>**</li>中,格式化输出,不带'<li></li>'
- if is_Chinese(word):
- for word_explination in word_div.find_all('a', 'search-js'):
- print word_explination.string
- else:
- for word_explination in word_div.find_all('li'):
- print str(word_explination)[4: -5]
- # 第二种输入,还得显示出单词组成的短语
- # BeautifulSoup提供的find函数可以查找'p', 'a','span'等html标志语言模块
- if length == 3:
- if sys.argv[2] != '-detail':
- input_error()
- else:
- raw_explination = ''
- span = soup.find_all('p', 'wordGroup')
- # 最后一组词组有问题,删去了
- for element in span[: -1]:
- link = element.find('a')
- # 转化成字符串,用于定位到解释那里
- raw_explination += str(element)
- location = raw_explination.find('</span>')
- # 格式化单词解释,</span>长度为7, </p>长度为4, 只采用第一个意思
- explination = raw_explination[location + 7: -4].split()[0]
- # python 自带的输出对齐, ljust(num), rjust(num)
- print link.string.ljust(25), explination
- raw_explination = ''
- if __name__ == '__main__':
- main()
- #该片段来自于http://www.codesnippet.cn/detail/151020136415.html
来源: http://www.codesnippet.cn/detail/151020136415.html