前言:
此刻你们对“apachepoi读取docx”可能比较着重,朋友们都需要了解一些“apachepoi读取docx”的相关知识。那么小编同时在网络上收集了一些有关“apachepoi读取docx””的相关内容,希望姐妹们能喜欢,小伙伴们快快来了解一下吧!适用场景:java读取带图片或不带图片的doc文件
1.带图片的doc文件格式
2.运行代码
生成的html格式文件:
生成到目录的图片:
双击打开csDoc.html:
代码:
package com.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class TestDoc {
@Test
public void testReadImgDoc() {
System.out.println(analyzeDoc());
}
private String analyzeDoc(){
//对应桌面地址的doc格式文件
String path="C:\\Users\\Administrator\\Desktop\\cs.doc";
StringBuffer tempBuffer = new StringBuffer();
InputStream in = null;
try
{
in = new FileInputStream(path);
HWPFDocument doc=new HWPFDocument(in);
// 取得文档中字符的总数
int length = doc.characterLength();
// 创建图片容器
PicturesTable pTable = doc.getPicturesTable();
//存放图片路径
String fileName = "D:\\upload\\cs\\";
for (int i = 0; i < length - 1; i++) {
// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if (pTable.hasPicture(cr)) {
String pic = readPicture(pTable, cr,i,fileName);
// 读写图片
tempBuffer.append("<img src='"+pic+"' />");
}else {
char c = cr.text().charAt(0);
// 判断为回车符
if (c == 13) {
tempBuffer.append("<br/>");
}else {
tempBuffer.append(cr.text());
}
}
}
}catch (FileNotFoundException e) {
//异常按业务场景处理
}catch (IOException e) {
//异常按业务场景处理
}
finally {
if(in != null) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
String docxContent = tempBuffer.toString();
PrintWriter pw = null;
try {
//对应桌面地址
pw = new PrintWriter("C:\\Users\\Administrator\\Desktop\\csDoc.html");
//输出到html,然后桌面找到csDoc.html打开看效果
pw.write(docxContent);
} catch (FileNotFoundException e) {
}finally {
if(pw != null) {
pw.close();
}
}
return docxContent;
}
private String readPicture(PicturesTable pTable, CharacterRun cr,int i,String imgPath) {
// 提取图片
Picture pic = pTable.extractPicture(cr, false);
// 返回POI建议的图片文件名
String img = pic.suggestFullFileName();
img = createNumber() +i+ img;
String fileName = imgPath + img;
File file = new File(fileName);
//如果路径不存在,则创建
if (!file.getParentFile().exists()) {
file.getParentFile().mkdirs();
}
OutputStream out=null;
try {
out = new FileOutputStream(new File(fileName));
pic.writeImageContent(out);
}catch(Exception e) {
//异常按业务需求处理
return "";
}finally {
if(out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return fileName;
}
/**
* 生成18位编号,yyyyMMddHHmmss+4位随机数(共18位)
*/
private String createNumber(){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
Random r = new Random();
return sdf.format(new Date())+(r.nextInt(9000)+1000);
}
}
标签: #apachepoi读取docx