博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
使用POI把Word Excel转为HTML
阅读量:5965 次
发布时间:2019-06-19

本文共 7170 字,大约阅读时间需要 23 分钟。

此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。

所需jar包如下如下:

1:PoiUtil.java

package com.wzh.poi;import java.io.BufferedWriter;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStreamWriter;import java.io.UnsupportedEncodingException;import java.util.List;import java.util.logging.Level;import java.util.logging.Logger;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerConfigurationException;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.poi.hssf.converter.ExcelToHtmlConverter;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.w3c.dom.Document;/** * @date 2015-3-16 17:22:05 * @author y * @desc */public class PoiUtil {    /**     * Excel 转为 HTML     * @param fileName     * @param outputFile     * @throws FileNotFoundException     * @throws IOException     * @throws ParserConfigurationException     * @throws TransformerConfigurationException     * @throws TransformerException      */    public static void excelToHtml(String fileName, String outputFile)            throws FileNotFoundException, IOException, ParserConfigurationException,                 TransformerConfigurationException, TransformerException {        InputStream is = new FileInputStream(fileName);        HSSFWorkbook excelBook = new HSSFWorkbook(is);        ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());        ethc.setOutputColumnHeaders(false);        ethc.setOutputRowNumbers(false);        ethc.processWorkbook(excelBook);        Document htmlDocument = ethc.getDocument();        ByteArrayOutputStream out = new ByteArrayOutputStream();        DOMSource domSource = new DOMSource(htmlDocument);        StreamResult streamResult = new StreamResult(out);                TransformerFactory tf = TransformerFactory.newInstance();        Transformer serializer = tf.newTransformer();        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");        serializer.setOutputProperty(OutputKeys.INDENT, "yes");        serializer.setOutputProperty(OutputKeys.METHOD, "html");        serializer.transform(domSource, streamResult);        out.close();                String htmlStr = new String(out.toByteArray());                htmlStr = htmlStr.replace("

Sheet1

", "") .replace("

Sheet2

", "") .replace("

Sheet3

", "") .replace("

Sheet4

", "") .replace("

Sheet5

", ""); writeFile(htmlStr, outputFile); } /** * Word 转为 HTML * * @param fileName * @param outputFile * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public static void wordToHtml(String fileName, String outputFile) throws IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) { return string; } }); wthc.processDocument(wordDoc); List
pics = wordDoc.getPicturesTable().getAllPictures(); if (null != pics && pics.size() > 0) { for (Picture pic : pics) { pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName())); } } Document htmlDocument = wthc.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray()); writeFile(htmlStr, outputFile); } public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; File file = new File(path); try { fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8")); bw.write(content); } catch (FileNotFoundException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (UnsupportedEncodingException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (null != bw) { bw.close(); } if (null != fos) { fos.close(); } } catch (IOException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } } }}

2.Test.java

import com.wzh.poi.PoiUtil;import java.io.IOException;import java.util.logging.Level;import java.util.logging.Logger;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.TransformerException;/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. *//** * * @author y */public class Test {    /**     * @param args the command line arguments     */    public static void main(String[] args) {        try {            PoiUtil.excelToHtml("t2.xls", "test.html");        } catch (IOException ex) {            Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);        } catch (ParserConfigurationException ex) {            Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);        } catch (TransformerException ex) {            Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);        }            }    }

 

转载地址:http://lavax.baihongyu.com/

你可能感兴趣的文章
python总结
查看>>
hdu 5215 Cycle
查看>>
GCD学习(五) dispatch_barrier_async
查看>>
file_get_contents("php://input")的使用方法
查看>>
MeasureSpec学习
查看>>
Android View体系(五)从源码解析View的事件分发机制
查看>>
数据结构 之 并查集(Disjoint Set)
查看>>
枚举类的创建和使用
查看>>
如何改变Myeclipse编辑区背景色(转)
查看>>
深入浅出LVM on linux
查看>>
转载 C++实现的委托机制
查看>>
编辑框CEdit自动换行简单设置
查看>>
很实用的小功能,通过配置Web.xml让点击文件路径的超链接,直接下载而不会在浏览器上尝试打开...
查看>>
【转】HTML5杂谈 概念与现行游戏 割绳子 宝石迷阵
查看>>
Java解析xml的主要解析器: SAX和DOM的选择(附上新方法--Pull解析)
查看>>
再谈 document.documentElement 与 document.body 的 scrollWidth、offsetWidth、clientWidth
查看>>
项目管理: Maven 让事情变得简单
查看>>
传智播客--itcastbbs(四)
查看>>
【转】笔记本电脑关机时蓝屏是怎么回事
查看>>
UVA 10118 Free Candies
查看>>