Java 使用 poi 将 word 转换为 html
这里有新鲜出炉的 Java 并发编程示例, 程序狗速度看过来!
Java 程序设计语言
java 是一种可以撰写跨平台应用软件的面向对象的程序设计语言, 是由 Sun Microsystems 公司于 1995 年 5 月推出的 Java 程序设计语言和 Java 平台 (即 JavaEE(j2ee), JavaME(j2me), JavaSE(j2se)) 的总称
这篇文章主要为大家详细介绍了 Java 使用 poi 将 word 转换为 html 的相关资料, 具有一定的参考价值, 感兴趣的小伙伴们可以参考一下
使用 poi 将 word 转换为 html, 支持 doc,docx, 转换后可以保持图片样式
1. 导入 Maven 包
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- <version>3.14</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-scratchpad</artifactId>
- <version>3.14</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- <version>3.14</version>
- </dependency>
- <dependency>
- <groupId>fr.opensagres.xdocreport</groupId>
- <artifactId>xdocreport</artifactId>
- <version>1.0.6</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml-schemas</artifactId>
- <version>3.14</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>ooxml-schemas</artifactId>
- <version>1.3</version>
- </dependency>
2. 转换代码
- import org.apache.poi.hwpf.HWPFDocument;
- import org.apache.poi.hwpf.converter.WordToHtmlConverter;
- import org.apache.poi.xwpf.converter.core.BasicURIResolver;
- import org.apache.poi.xwpf.converter.core.FileImageExtractor;
- import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
- import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
- import org.apache.poi.xwpf.usermodel.XWPFDocument;
- import org.w3c.dom.Document;
- import javax.xml.parsers.DocumentBuilderFactory;
- import javax.xml.transform.OutputKeys;
- import javax.xml.transform.Transformer;
- import javax.xml.transform.TransformerFactory;
- import javax.xml.transform.dom.DOMSource;
- import javax.xml.transform.stream.StreamResult;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.OutputStreamWriter;
- public class Test {
- // doc 转换为 html
- void docToHtml() throws Exception {
- String sourceFileName = "C:\\doc\\test.doc";
- String targetFileName = "C:\\html\\test.html";
- String imagePathStr = "C:\\html\\image\\";
- HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
- Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
- WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
- // 保存图片, 并返回图片的相对路径
- wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) - >{
- try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
- out.write(content);
- } catch(Exception e) {
- e.printStackTrace();
- }
- return "image/" + name;
- });
- wordToHtmlConverter.processDocument(wordDocument);
- Document htmlDocument = wordToHtmlConverter.getDocument();
- DOMSource domSource = new DOMSource(htmlDocument);
- StreamResult streamResult = new StreamResult(new File(targetFileName));
- TransformerFactory tf = TransformerFactory.newInstance();
- Transformer serializer = tf.newTransformer();
- serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
- serializer.setOutputProperty(OutputKeys.INDENT, "yes");
- serializer.setOutputProperty(OutputKeys.METHOD, "html");
- serializer.transform(domSource, streamResult);
- }
- // docx 转换为 html
- public void docxToHtml() throws Exception {
- String sourceFileName = "D:\\ac\\00.docx";
- String targetFileName = "D:\\ac\\test.html";
- String imagePathStr = "D:\\ac\\image\\";
- OutputStreamWriter outputStreamWriter = null;
- try {
- XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
- XHTMLOptions options = XHTMLOptions.create();
- // 存放图片的文件夹
- options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
- // html 中图片的路径
- options.URIResolver(new BasicURIResolver("image"));
- outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
- XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
- xhtmlConverter.convert(document, outputStreamWriter, options);
- } finally {
- if (outputStreamWriter != null) {
- outputStreamWriter.close();
- }
- }
- }
来源: http://www.phperz.com/article/18/0211/359009.html