前言:
而今姐妹们对“javahtml生成word”都比较注意,我们都想要分析一些“javahtml生成word”的相关内容。那么小编在网上汇集了一些关于“javahtml生成word””的相关内容,希望大家能喜欢,看官们快快来学习一下吧!日常操作中,word转pdf是较为常见的操作。尤其是前端上传word文档,需要在页面预览文档的情况。前端直接预览word需要特殊的处理,但是如果由后端先把word转为pdf,再预览,就会比较简单。
效果预览:
接下来就分享实测过的实现方式。
环境:JDK11、Springboot 2.3.7.RELEASE、windows10、Maven
第一步,Maven 依赖配置,主要导入一些工具包
<dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.4</version> </dependency> <!--word支持 poi-tl 1.10.xx版本只支持4.x 版本的poi--> <dependency> <groupId>com.deepoove</groupId> <artifactId>poi-tl</artifactId> <version>1.10.2</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId> <version>2.0.2</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.2</version> </dependency> <!--2.0.2版的fr.opensagres.xdocreport不支持5.x的Poi,因此poi只能用4.x版本的--> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.core</artifactId> <version>2.0.2</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId> <version>2.0.2</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13.2</version> </dependency> <dependency> <groupId>com.itextpdf.tool</groupId> <artifactId>xmlworker</artifactId> <version>5.5.13.2</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>html2pdf</artifactId> <version>4.0.1</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.14.3</version> </dependency> </dependencies>第二步,service 业务层构造
package com.yalin.cn.fileutil.word.service;import java.io.InputStream;import java.io.OutputStream;/** * @description: word生成pdf * @author: lyl * @create: 2021-05-08 16:31:47 **/public interface IWordConvertPdfService { /** * docx 转pdf * * @param sourcePath word路径 * @param targetPath pdf路径 * @param imageDir word中的图片临时存放路径 * @return boolean */ boolean convert(String sourcePath, String targetPath, String imageDir); /** * docx 转pdf * * @param in word文件流 * @param targetPath pdf路径 * @param imageDir word中的图片临时存放路径 * @return boolean */ boolean convert(InputStream in, String targetPath, String imageDir); /** * docx 转pdf * * @param in word文件流 * @param out pdf文件流 * @param imageDir word中的图片临时存放路径 * @return boolean * @throws Exception 抛出异常 */ boolean convert(InputStream in, OutputStream out, String imageDir) throws Exception;}第三步,service impl 业务实现层构造
package com.yalin.cn.fileutil.word.service.impl;import com.yalin.cn.fileutil.util.OfficeUtil;import com.yalin.cn.fileutil.word.service.IWordConvertPdfService;import lombok.extern.slf4j.Slf4j;import org.springframework.stereotype.Service;import java.io.InputStream;import java.io.OutputStream;import java.nio.file.Files;import java.nio.file.Paths;import java.util.Objects;/** * @description: word生成pdf * @author: lyl * @create: 2021-05-08 16:31:47 **/@Service@Slf4jpublic class WordConvertPdfServiceImpl implements IWordConvertPdfService { /** * docx 转pdf * * @param sourcePath word路径 * @param targetPath pdf路径 * @param imageDir word中的图片临时存放路径 * @return boolean */ @Override public boolean convert(String sourcePath, String targetPath, String imageDir) { try (InputStream inputStream = Files.newInputStream(Paths.get(sourcePath)); OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) { return convert(inputStream, outputStream, imageDir); } catch (Exception e) { log.error("convert(String, String, String)异常:{}", e); } return false; } /** * docx 转pdf * * @param in word文件流 * @param targetPath pdf路径 * @param imageDir word中的图片临时存放路径 * @return boolean */ @Override public boolean convert(InputStream in, String targetPath, String imageDir) { try (OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) { return convert(in, outputStream, imageDir); } catch (Exception e) { log.error("convert(String, String, String)异常:{}", e); } return false; } /** * docx 转pdf * * @param in word文件流 * @param out pdf文件流 * @param imageDir word中的图片临时存放路径 * @return boolean */ @Override public boolean convert(InputStream in, OutputStream out, String imageDir) throws Exception { if (Objects.isNull(in)) { throw new Exception("模板文件流为null!"); } if (Objects.isNull(out)) { throw new Exception("目标文件流为null!"); } try { // word转pdf OfficeUtil.docxConvertPdf(in, out, imageDir); return true; } catch (Exception e) { log.error("fill(InputStream, OutputStream, String)异常:{}", e); } return false; }}第四步,真正实现转换的工具类
package com.yalin.cn.fileutil.util;import com.itextpdf.text.*;import com.itextpdf.text.pdf.BaseFont;import com.itextpdf.text.pdf.PdfWriter;import com.itextpdf.tool.xml.XMLWorkerHelper;import com.yalin.cn.fileutil.font.AutoFontFactory;import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.commons.lang3.StringUtils;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.jsoup.Jsoup;import org.jsoup.nodes.Element;import org.jsoup.nodes.Entities;import org.jsoup.select.Elements;import java.io.*;import java.nio.charset.Charset;import java.util.Objects;/** * @description: word 转pdf * @author: lyl * @create: 2021-04-23 11:09:51 **/public class OfficeUtil { /** * 将docx格式文件转成html * * @param in docx文件流 * @param imageDir docx文件中图片存储目录 * @return html */ public static String docx2Html(InputStream in, String imageDir) throws Exception { String content = null; ByteArrayOutputStream baos = null; try { // 1> 加载文档到XWPFDocument XWPFDocument document = new XWPFDocument(in); // 2> 解析XHTML配置(这里设置IURIResolver来设置图片存放的目录) XHTMLOptions options = XHTMLOptions.create(); // 存放word中图片的目录 if (Objects.nonNull(imageDir)) { options.setExtractor(new FileImageExtractor(new File(imageDir))); options.URIResolver(new BasicURIResolver(imageDir)); options.setIgnoreStylesIfUnused(false); options.setFragment(true); } // 3> 将XWPFDocument转换成XHTML baos = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, baos, options); } catch (Exception e) { e.printStackTrace(); throw new Exception(e); } finally { try { if (in != null) { in.close(); } if (baos != null) { content = new String(baos.toByteArray(), "utf-8"); baos.close(); } } catch (Exception e) { e.printStackTrace(); } } return content; } /** * 使用jsoup规范化html * * @param html html内容 * @return 规范化后的html */ private static String formatHtml(String html) { org.jsoup.nodes.Document doc = Jsoup.parse(html); // 去除过大的宽度 String style = doc.attr("style"); if (StringUtils.isNotEmpty(style) && style.contains("width")) { doc.attr("style", ""); } Elements divs = doc.select("div"); for (Element div : divs) { String divStyle = div.attr("style"); if (StringUtils.isNotEmpty(divStyle) && divStyle.contains("width")) { div.attr("style", ""); } } // jsoup生成闭合标签 doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml); doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); return doc.html(); } /** * html转成pdf * * @param html html * @param out 输出pdf文件流 */ public static void htmlToPdf(String html, OutputStream out) throws Exception { Document document = null; ByteArrayInputStream bais = null; try { // 纸 document = new Document(PageSize.A4); // 笔 PdfWriter writer = PdfWriter.getInstance(document, out); document.open(); // html转pdf bais = new ByteArrayInputStream(html.getBytes("UTF-8")); XMLWorkerHelper.getInstance().parseXHtml(writer, document, bais, Charset.forName("UTF-8"), new FontProvider() { @Override public boolean isRegistered(String s) { return false; } @Override public Font getFont(String s, String s1, boolean embedded, float size, int style, BaseColor baseColor) { // 配置字体 Font font = null; try { BaseFont bf = AutoFontFactory.getBaseFont(); font = new Font(bf, size, style, baseColor); font.setColor(baseColor); } catch (Exception e) { e.printStackTrace(); } return font; } }); } catch (Exception e) { e.printStackTrace(); throw new Exception(e); } finally { if (document != null) { document.close(); } if (bais != null) { try { bais.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * docx 转pdf * * @param in docx文件流 * @param out pdf文件流 * @param imageDir docx中图片存放路径 * @return boolean */ public static boolean docxConvertPdf(InputStream in, OutputStream out, String imageDir) { try { String docxHtml = docx2Html(in, imageDir); docxHtml = formatHtml(docxHtml); htmlToPdf(docxHtml, out); return true; } catch (Exception e) { e.printStackTrace(); } return false; }}
备注:OfficeUtil中的AutoFontFactory属于自定义的字体。因为linux环境下不支持某些中文字体,导致乱码。解决方案之一,就是从windows字体库中复制一个,放到resource目录下,在代码中引用即可。
package com.yalin.cn.fileutil.font;import com.itextpdf.text.DocumentException;import com.itextpdf.text.pdf.BaseFont;import java.io.IOException;/** * @description: 字体工厂 * @author: lyl * @create: 2022-01-17 15:38:29 **/public class AutoFontFactory { /** * 获取基础字体 * * @return BaseFont * @throws IOException * @throws DocumentException */ public static BaseFont getBaseFont() throws IOException, DocumentException { // 方案一:使用资源字体(需要有字体) BaseFont bf = BaseFont.createFont("/font/simsun.ttc,0", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); // 方案二:使用本地字体(本地需要有字体) // BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/seguisym.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); // 方案二:使用jar包:iTextAsian,这样只需一个jar包就可以了 // BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED); return bf; }}
测试类
@Test void wordConvertPdf() { String basePath = "C:\\Users\\lyl\\Desktop\\"; String sourcePath = basePath + "原始word文件.docx"; String targetPath = basePath + "转换之后的pdf文件.pdf"; String imagePath = basePath + "img" + File.separator; WordConvertPdfServiceImpl tt = new WordConvertPdfServiceImpl(); boolean flag = tt.convert(sourcePath, targetPath, imagePath); System.out.println(flag); }
版权声明:
本站文章均来自互联网搜集,如有侵犯您的权益,请联系我们删除,谢谢。
标签: #javahtml生成word