此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。
所需jar包如下如下:
1:PoiUtil.java
package com.wzh.poi;import java.io.BufferedWriter;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStreamWriter;import java.io.UnsupportedEncodingException;import java.util.List;import java.util.logging.Level;import java.util.logging.Logger;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerConfigurationException;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.poi.hssf.converter.ExcelToHtmlConverter;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.w3c.dom.Document;/** * @date 2015-3-16 17:22:05 * @author y * @desc */public class PoiUtil { /** * Excel 转为 HTML * @param fileName * @param outputFile * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerConfigurationException * @throws TransformerException */ public static void excelToHtml(String fileName, String outputFile) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerConfigurationException, TransformerException { InputStream is = new FileInputStream(fileName); HSSFWorkbook excelBook = new HSSFWorkbook(is); ExcelToHtmlConverter ethc = new ExcelToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); ethc.setOutputColumnHeaders(false); ethc.setOutputRowNumbers(false); ethc.processWorkbook(excelBook); Document htmlDocument = ethc.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray()); htmlStr = htmlStr.replace("Sheet1
", "") .replace("Sheet2
", "") .replace("Sheet3
", "") .replace("Sheet4
", "") .replace("Sheet5
", ""); writeFile(htmlStr, outputFile); } /** * Word 转为 HTML * * @param fileName * @param outputFile * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public static void wordToHtml(String fileName, String outputFile) throws IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) { return string; } }); wthc.processDocument(wordDoc); List
2.Test.java
import com.wzh.poi.PoiUtil;import java.io.IOException;import java.util.logging.Level;import java.util.logging.Logger;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.TransformerException;/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. *//** * * @author y */public class Test { /** * @param args the command line arguments */ public static void main(String[] args) { try { PoiUtil.excelToHtml("t2.xls", "test.html"); } catch (IOException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } catch (ParserConfigurationException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } catch (TransformerException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } } }