龙空技术网

Java 文件处理系列之:word转pdf

北方有雪吗 319

前言:

而今姐妹们对“javahtml生成word”都比较注意,我们都想要分析一些“javahtml生成word”的相关内容。那么小编在网上汇集了一些关于“javahtml生成word””的相关内容,希望大家能喜欢,看官们快快来学习一下吧!

日常操作中,word转pdf是较为常见的操作。尤其是前端上传word文档,需要在页面预览文档的情况。前端直接预览word需要特殊的处理,但是如果由后端先把word转为pdf,再预览,就会比较简单。

效果预览:

原始word文件.docx

转换之后的pdf文件.pdf

接下来就分享实测过的实现方式。

环境:JDK11、Springboot 2.3.7.RELEASE、windows10、Maven

第一步,Maven 依赖配置,主要导入一些工具包

    <dependencies>        <dependency>            <groupId>org.springframework.boot</groupId>            <artifactId>spring-boot-starter-web</artifactId>        </dependency>        <dependency>            <groupId>org.springframework.boot</groupId>            <artifactId>spring-boot-starter-test</artifactId>            <scope>test</scope>        </dependency>        <dependency>            <groupId>org.projectlombok</groupId>            <artifactId>lombok</artifactId>            <optional>true</optional>        </dependency>        <dependency>            <groupId>org.apache.commons</groupId>            <artifactId>commons-lang3</artifactId>            <version>3.4</version>        </dependency>        <!--word支持 poi-tl 1.10.xx版本只支持4.x 版本的poi-->        <dependency>            <groupId>com.deepoove</groupId>            <artifactId>poi-tl</artifactId>            <version>1.10.2</version>        </dependency>        <dependency>            <groupId>fr.opensagres.xdocreport</groupId>            <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId>            <version>2.0.2</version>        </dependency>        <dependency>            <groupId>org.apache.poi</groupId>            <artifactId>poi-scratchpad</artifactId>            <version>4.1.2</version>        </dependency>        <!--2.0.2版的fr.opensagres.xdocreport不支持5.x的Poi,因此poi只能用4.x版本的-->        <dependency>            <groupId>fr.opensagres.xdocreport</groupId>            <artifactId>fr.opensagres.poi.xwpf.converter.core</artifactId>            <version>2.0.2</version>        </dependency>        <dependency>            <groupId>fr.opensagres.xdocreport</groupId>            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>            <version>2.0.2</version>        </dependency>        <dependency>            <groupId>com.itextpdf</groupId>            <artifactId>itextpdf</artifactId>            <version>5.5.13.2</version>        </dependency>        <dependency>            <groupId>com.itextpdf.tool</groupId>            <artifactId>xmlworker</artifactId>            <version>5.5.13.2</version>        </dependency>        <dependency>            <groupId>com.itextpdf</groupId>            <artifactId>itext-asian</artifactId>            <version>5.2.0</version>        </dependency>        <dependency>            <groupId>com.itextpdf</groupId>            <artifactId>html2pdf</artifactId>            <version>4.0.1</version>        </dependency>        <dependency>            <groupId>org.jsoup</groupId>            <artifactId>jsoup</artifactId>            <version>1.14.3</version>        </dependency>    </dependencies>
第二步,service 业务层构造
package com.yalin.cn.fileutil.word.service;import java.io.InputStream;import java.io.OutputStream;/** * @description: word生成pdf * @author: lyl * @create: 2021-05-08 16:31:47 **/public interface IWordConvertPdfService {    /**     * docx 转pdf     *     * @param sourcePath word路径     * @param targetPath pdf路径     * @param imageDir   word中的图片临时存放路径     * @return boolean     */    boolean convert(String sourcePath, String targetPath, String imageDir);    /**     * docx 转pdf     *     * @param in         word文件流     * @param targetPath pdf路径     * @param imageDir   word中的图片临时存放路径     * @return boolean     */    boolean convert(InputStream in, String targetPath, String imageDir);    /**     * docx 转pdf     *     * @param in       word文件流     * @param out      pdf文件流     * @param imageDir word中的图片临时存放路径     * @return boolean     * @throws Exception 抛出异常     */    boolean convert(InputStream in, OutputStream out, String imageDir) throws Exception;}
第三步,service impl 业务实现层构造
package com.yalin.cn.fileutil.word.service.impl;import com.yalin.cn.fileutil.util.OfficeUtil;import com.yalin.cn.fileutil.word.service.IWordConvertPdfService;import lombok.extern.slf4j.Slf4j;import org.springframework.stereotype.Service;import java.io.InputStream;import java.io.OutputStream;import java.nio.file.Files;import java.nio.file.Paths;import java.util.Objects;/** * @description: word生成pdf * @author: lyl * @create: 2021-05-08 16:31:47 **/@Service@Slf4jpublic class WordConvertPdfServiceImpl implements IWordConvertPdfService {    /**     * docx 转pdf     *     * @param sourcePath word路径     * @param targetPath pdf路径     * @param imageDir   word中的图片临时存放路径     * @return boolean     */    @Override    public boolean convert(String sourcePath, String targetPath, String imageDir) {        try (InputStream inputStream = Files.newInputStream(Paths.get(sourcePath));             OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) {            return convert(inputStream, outputStream, imageDir);        } catch (Exception e) {            log.error("convert(String, String, String)异常:{}", e);        }        return false;    }    /**     * docx 转pdf     *     * @param in         word文件流     * @param targetPath pdf路径     * @param imageDir   word中的图片临时存放路径     * @return boolean     */    @Override    public boolean convert(InputStream in, String targetPath, String imageDir) {        try (OutputStream outputStream = Files.newOutputStream(Paths.get(targetPath))) {            return convert(in, outputStream, imageDir);        } catch (Exception e) {            log.error("convert(String, String, String)异常:{}", e);        }        return false;    }    /**     * docx 转pdf     *     * @param in       word文件流     * @param out      pdf文件流     * @param imageDir word中的图片临时存放路径     * @return boolean     */    @Override    public boolean convert(InputStream in, OutputStream out, String imageDir) throws Exception {        if (Objects.isNull(in)) {            throw new Exception("模板文件流为null!");        }        if (Objects.isNull(out)) {            throw new Exception("目标文件流为null!");        }        try {            // word转pdf            OfficeUtil.docxConvertPdf(in, out, imageDir);            return true;        } catch (Exception e) {            log.error("fill(InputStream, OutputStream, String)异常:{}", e);        }        return false;    }}
第四步,真正实现转换的工具类
package com.yalin.cn.fileutil.util;import com.itextpdf.text.*;import com.itextpdf.text.pdf.BaseFont;import com.itextpdf.text.pdf.PdfWriter;import com.itextpdf.tool.xml.XMLWorkerHelper;import com.yalin.cn.fileutil.font.AutoFontFactory;import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.commons.lang3.StringUtils;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.jsoup.Jsoup;import org.jsoup.nodes.Element;import org.jsoup.nodes.Entities;import org.jsoup.select.Elements;import java.io.*;import java.nio.charset.Charset;import java.util.Objects;/** * @description: word 转pdf * @author: lyl * @create: 2021-04-23 11:09:51 **/public class OfficeUtil {    /**     * 将docx格式文件转成html     *     * @param in       docx文件流     * @param imageDir docx文件中图片存储目录     * @return html     */    public static String docx2Html(InputStream in, String imageDir) throws Exception {        String content = null;        ByteArrayOutputStream baos = null;        try {            // 1> 加载文档到XWPFDocument            XWPFDocument document = new XWPFDocument(in);            // 2> 解析XHTML配置(这里设置IURIResolver来设置图片存放的目录)            XHTMLOptions options = XHTMLOptions.create();            // 存放word中图片的目录            if (Objects.nonNull(imageDir)) {                options.setExtractor(new FileImageExtractor(new File(imageDir)));                options.URIResolver(new BasicURIResolver(imageDir));                options.setIgnoreStylesIfUnused(false);                options.setFragment(true);            }            // 3> 将XWPFDocument转换成XHTML            baos = new ByteArrayOutputStream();            XHTMLConverter.getInstance().convert(document, baos, options);        } catch (Exception e) {            e.printStackTrace();            throw new Exception(e);        } finally {            try {                if (in != null) {                    in.close();                }                if (baos != null) {                    content = new String(baos.toByteArray(), "utf-8");                    baos.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }        return content;    }    /**     * 使用jsoup规范化html     *     * @param html html内容     * @return 规范化后的html     */    private static String formatHtml(String html) {        org.jsoup.nodes.Document doc = Jsoup.parse(html);        // 去除过大的宽度        String style = doc.attr("style");        if (StringUtils.isNotEmpty(style) && style.contains("width")) {            doc.attr("style", "");        }        Elements divs = doc.select("div");        for (Element div : divs) {            String divStyle = div.attr("style");            if (StringUtils.isNotEmpty(divStyle) && divStyle.contains("width")) {                div.attr("style", "");            }        }        // jsoup生成闭合标签        doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);        return doc.html();    }    /**     * html转成pdf     *     * @param html html     * @param out  输出pdf文件流     */    public static void htmlToPdf(String html, OutputStream out) throws Exception {        Document document = null;        ByteArrayInputStream bais = null;        try {            // 纸            document = new Document(PageSize.A4);            // 笔            PdfWriter writer = PdfWriter.getInstance(document, out);            document.open();            // html转pdf            bais = new ByteArrayInputStream(html.getBytes("UTF-8"));            XMLWorkerHelper.getInstance().parseXHtml(writer, document, bais,                    Charset.forName("UTF-8"), new FontProvider() {                        @Override                        public boolean isRegistered(String s) {                            return false;                        }                        @Override                        public Font getFont(String s, String s1, boolean embedded, float size, int style, BaseColor baseColor) {                            // 配置字体                            Font font = null;                            try {                                BaseFont bf = AutoFontFactory.getBaseFont();                                font = new Font(bf, size, style, baseColor);                                font.setColor(baseColor);                            } catch (Exception e) {                                e.printStackTrace();                            }                            return font;                        }                    });        } catch (Exception e) {            e.printStackTrace();            throw new Exception(e);        } finally {            if (document != null) {                document.close();            }            if (bais != null) {                try {                    bais.close();                } catch (IOException e) {                    e.printStackTrace();                }            }        }    }    /**     * docx 转pdf     *     * @param in       docx文件流     * @param out      pdf文件流     * @param imageDir docx中图片存放路径     * @return boolean     */    public static boolean docxConvertPdf(InputStream in, OutputStream out, String imageDir) {        try {            String docxHtml = docx2Html(in, imageDir);            docxHtml = formatHtml(docxHtml);            htmlToPdf(docxHtml, out);            return true;        } catch (Exception e) {            e.printStackTrace();        }        return false;    }}

备注:OfficeUtil中的AutoFontFactory属于自定义的字体。因为linux环境下不支持某些中文字体,导致乱码。解决方案之一,就是从windows字体库中复制一个,放到resource目录下,在代码中引用即可。

package com.yalin.cn.fileutil.font;import com.itextpdf.text.DocumentException;import com.itextpdf.text.pdf.BaseFont;import java.io.IOException;/** * @description: 字体工厂 * @author: lyl * @create: 2022-01-17 15:38:29 **/public class AutoFontFactory {    /**     * 获取基础字体     *     * @return BaseFont     * @throws IOException     * @throws DocumentException     */    public static BaseFont getBaseFont() throws IOException, DocumentException {        // 方案一:使用资源字体(需要有字体)        BaseFont bf = BaseFont.createFont("/font/simsun.ttc,0", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);        // 方案二:使用本地字体(本地需要有字体)        // BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/seguisym.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);        // 方案二:使用jar包:iTextAsian,这样只需一个jar包就可以了        // BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);        return bf;    }}

测试类

 @Test    void wordConvertPdf() {        String basePath = "C:\\Users\\lyl\\Desktop\\";        String sourcePath = basePath + "原始word文件.docx";        String targetPath = basePath + "转换之后的pdf文件.pdf";        String imagePath = basePath + "img" + File.separator;        WordConvertPdfServiceImpl tt = new WordConvertPdfServiceImpl();        boolean flag = tt.convert(sourcePath, targetPath, imagePath);        System.out.println(flag);    }

标签: #javahtml生成word