龙空技术网

java读取带图片的doc文件

酒加糖 218

前言:

此刻你们对“apachepoi读取docx”可能比较着重,朋友们都需要了解一些“apachepoi读取docx”的相关知识。那么小编同时在网络上收集了一些有关“apachepoi读取docx””的相关内容,希望姐妹们能喜欢,小伙伴们快快来了解一下吧!

适用场景:java读取带图片或不带图片的doc文件

1.带图片的doc文件格式

2.运行代码

生成的html格式文件:

生成到目录的图片:

双击打开csDoc.html:

代码:

package com.test;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.io.PrintWriter;

import java.text.SimpleDateFormat;

import java.util.Date;

import java.util.Random;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.model.PicturesTable;

import org.apache.poi.hwpf.usermodel.CharacterRun;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.Range;

import org.junit.Test;

import org.junit.runner.RunWith;

import org.springframework.boot.test.context.SpringBootTest;

import org.springframework.test.context.junit4.SpringRunner;

@RunWith(SpringRunner.class)

@SpringBootTest

public class TestDoc {

@Test

public void testReadImgDoc() {

System.out.println(analyzeDoc());

}

private String analyzeDoc(){

//对应桌面地址的doc格式文件

String path="C:\\Users\\Administrator\\Desktop\\cs.doc";

StringBuffer tempBuffer = new StringBuffer();

InputStream in = null;

try

{

in = new FileInputStream(path);

HWPFDocument doc=new HWPFDocument(in);

// 取得文档中字符的总数

int length = doc.characterLength();

// 创建图片容器

PicturesTable pTable = doc.getPicturesTable();

//存放图片路径

String fileName = "D:\\upload\\cs\\";

for (int i = 0; i < length - 1; i++) {

// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围

Range range = new Range(i, i + 1, doc);

CharacterRun cr = range.getCharacterRun(0);

if (pTable.hasPicture(cr)) {

String pic = readPicture(pTable, cr,i,fileName);

// 读写图片

tempBuffer.append("<img src='"+pic+"' />");

}else {

char c = cr.text().charAt(0);

// 判断为回车符

if (c == 13) {

tempBuffer.append("<br/>");

}else {

tempBuffer.append(cr.text());

}

}

}

}catch (FileNotFoundException e) {

//异常按业务场景处理

}catch (IOException e) {

//异常按业务场景处理

}

finally {

if(in != null) {

try {

in.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

String docxContent = tempBuffer.toString();

PrintWriter pw = null;

try {

//对应桌面地址

pw = new PrintWriter("C:\\Users\\Administrator\\Desktop\\csDoc.html");

//输出到html,然后桌面找到csDoc.html打开看效果

pw.write(docxContent);

} catch (FileNotFoundException e) {

}finally {

if(pw != null) {

pw.close();

}

}

return docxContent;

}

private String readPicture(PicturesTable pTable, CharacterRun cr,int i,String imgPath) {

// 提取图片

Picture pic = pTable.extractPicture(cr, false);

// 返回POI建议的图片文件名

String img = pic.suggestFullFileName();

img = createNumber() +i+ img;

String fileName = imgPath + img;

File file = new File(fileName);

//如果路径不存在,则创建

if (!file.getParentFile().exists()) {

file.getParentFile().mkdirs();

}

OutputStream out=null;

try {

out = new FileOutputStream(new File(fileName));

pic.writeImageContent(out);

}catch(Exception e) {

//异常按业务需求处理

return "";

}finally {

if(out != null) {

try {

out.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

return fileName;

}

/**

* 生成18位编号,yyyyMMddHHmmss+4位随机数(共18位)

*/

private String createNumber(){

SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");

Random r = new Random();

return sdf.format(new Date())+(r.nextInt(9000)+1000);

}

}

标签: #apachepoi读取docx