java 是一种可以撰写跨平台应用软件的面向对象的程序设计语言,是由 Sun Microsystems 公司于 1995 年 5 月推出的 Java 程序设计语言和 Java 平台(即 JavaEE(j2ee), JavaME(j2me), JavaSE(j2se))的总称。
这篇文章主要为大家详细介绍了 java 实现 word 文件转 html 文件的方法,具有一定的参考价值,感兴趣的小伙伴们可以参考一下
最近在项目开发中用户提出要在电脑上没有装 office 时在浏览器中打开 word 文件,最后确定的逻辑:用户选择想要查看的文件,页面 js 判断文件是否为 word。不是执行下载,是后端根据 word 文件后缀访问对应转换方法。文件已存在对应 html 文件直接返回 html 文件地址,不存在先生成对应 html 文件再返回地址。js 直接通过 open() 打开新的页签,展示 word 文件内容。新人一枚,如果代码中存在错误或有更好的实现万望指正!
相关 jar 包
代码
- import java.io.ByteArrayOutputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import javax.xml.parsers.DocumentBuilderFactory;
- import javax.xml.parsers.ParserConfigurationException;
- import javax.xml.transform.OutputKeys;
- import javax.xml.transform.Transformer;
- import javax.xml.transform.TransformerException;
- import javax.xml.transform.TransformerFactory;
- import javax.xml.transform.dom.DOMSource;
- import javax.xml.transform.stream.StreamResult;
- import org.apache.poi.hwpf.HWPFDocument;
- import org.apache.poi.hwpf.converter.PicturesManager;
- import org.apache.poi.hwpf.converter.WordToHtmlConverter;
- import org.apache.poi.hwpf.usermodel.PictureType;
- import org.apache.poi.xwpf.converter.core.BasicURIResolver;
- import org.apache.poi.xwpf.converter.core.FileImageExtractor;
- import org.apache.poi.xwpf.converter.core.FileURIResolver;
- import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
- import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
- import org.apache.poi.xwpf.usermodel.XWPFDocument;
- import org.w3c.dom.Document;
- /**
- * word 转换成html 2017-2-27
- */
- public class WordToHtml {
- /**
- * 将word2003转换为html文件 2017-2-27
- * @param wordPath word文件路径
- * @param wordName word文件名称无后缀
- * @param suffix word文件后缀
- * @throws IOException
- * @throws TransformerException
- * @throws ParserConfigurationException
- */
- public String Word2003ToHtml(String wordPath, String wordName, String suffix) throws IOException,
- TransformerException,
- ParserConfigurationException {
- String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;
- String htmlName = wordName + ".html";
- final String imagePath = htmlPath + "image" + File.separator;
- //判断html文件是否存在
- File htmlFile = new File(htmlPath + htmlName);
- if (htmlFile.exists()) {
- return htmlFile.getAbsolutePath();
- }
- //原word文档
- final String file = wordPath + File.separator + wordName + suffix;
- InputStream input = new FileInputStream(new File(file));
- HWPFDocument wordDocument = new HWPFDocument(input);
- WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
- //设置图片存放的位置
- wordToHtmlConverter.setPicturesManager(new PicturesManager() {
- public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
- File imgPath = new File(imagePath);
- if (!imgPath.exists()) { //图片目录不存在则创建
- imgPath.mkdirs();
- }
- File file = new File(imagePath + suggestedName);
- try {
- OutputStream os = new FileOutputStream(file);
- os.write(content);
- os.close();
- } catch(FileNotFoundException e) {
- e.printStackTrace();
- } catch(IOException e) {
- e.printStackTrace();
- }
- //图片在html文件上的路径 相对路径
- return "image/" + suggestedName;
- }
- });
- //解析word文档
- wordToHtmlConverter.processDocument(wordDocument);
- Document htmlDocument = wordToHtmlConverter.getDocument();
- //生成html文件上级文件夹
- File folder = new File(htmlPath);
- if (!folder.exists()) {
- folder.mkdirs();
- }
- //生成html文件地址
- OutputStream outStream = new FileOutputStream(htmlFile);
- DOMSource domSource = new DOMSource(htmlDocument);
- StreamResult streamResult = new StreamResult(outStream);
- TransformerFactory factory = TransformerFactory.newInstance();
- Transformer serializer = factory.newTransformer();
- serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
- serializer.setOutputProperty(OutputKeys.INDENT, "yes");
- serializer.setOutputProperty(OutputKeys.METHOD, "html");
- serializer.transform(domSource, streamResult);
- outStream.close();
- return htmlFile.getAbsolutePath();
- }
- /**
- * 2007版本word转换成html 2017-2-27
- * @param wordPath word文件路径
- * @param wordName word文件名称无后缀
- * @param suffix word文件后缀
- * @return
- * @throws IOException
- */
- public String Word2007ToHtml(String wordPath, String wordName, String suffix) throws IOException {
- String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;
- String htmlName = wordName + ".html";
- String imagePath = htmlPath + "image" + File.separator;
- //判断html文件是否存在
- File htmlFile = new File(htmlPath + htmlName);
- if (htmlFile.exists()) {
- return htmlFile.getAbsolutePath();
- }
- //word文件
- File wordFile = new File(wordPath + File.separator + wordName + suffix);
- // 1) 加载word文档生成 XWPFDocument对象
- InputStream in =new FileInputStream(wordFile);
- XWPFDocument document = new XWPFDocument( in );
- // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
- File imgFolder = new File(imagePath);
- XHTMLOptions options = XHTMLOptions.create();
- options.setExtractor(new FileImageExtractor(imgFolder));
- //html中图片的路径 相对路径
- options.URIResolver(new BasicURIResolver("image"));
- options.setIgnoreStylesIfUnused(false);
- options.setFragment(true);
- // 3) 将 XWPFDocument转换成XHTML
- //生成html文件上级文件夹
- File folder = new File(htmlPath);
- if (!folder.exists()) {
- folder.mkdirs();
- }
- OutputStream out = new FileOutputStream(htmlFile);
- XHTMLConverter.getInstance().convert(document, out, options);
- return htmlFile.getAbsolutePath();
- }
- }
文件目录:
来源: http://www.phperz.com/article/17/1222/357878.html