- #coding:utf-8
- import os
- import win32com
- from win32com.client import Dispatch, constants
- from docx import Document
- def parse_doc(f):
- """读取doc,返回姓名和行业
- """
- doc = w.Documents.Open( FileName = f )
- t = doc.Tables[0] # 根据文件中的图表选择信息
- name = t.Rows[0].Cells[1].Range.Text
- situation = t.Rows[0].Cells[5].Range.Text
- people = t.Rows[1].Cells[1].Range.Text
- title = t.Rows[1].Cells[3].Range.Text
- print name, situation, people,title
- doc.Close()
- def parse_docx(f):
- """读取docx,返回姓名和行业
- """
- d = Document(f)
- t = d.tables[0]
- name = t.cell(0,1).text
- situation = t.cell(0,8).text
- people = t.cell(1,2).text
- title = t.cell(1,8).text
- print name, situation, people,title
- if __name__ == "__main__":
- w = win32com.client.Dispatch('Word.Application')
- # 遍历文件
- PATH = "H:\\work\\aaa" # windows文件路径
- doc_files = os.listdir(PATH)
- for doc in doc_files:
- if os.path.splitext(doc)[1] == '.docx':
- try:
- parse_docx(PATH+'\\'+doc)
- except Exception as e:
- print e
- elif os.path.splitext(doc)[1] == '.doc':
- try:
- parse_doc(PATH+'\\'+doc)
- except Exception as e:
- print e
- #该片段来自于http://www.codesnippet.cn/detail/2407201513224.html
来源: http://www.codesnippet.cn/detail/2407201513224.html