jacob读取word_java解析word文档有哪些方法

㈠ java读取带格式word内容

用复jacob.jar吧，读取word还是挺方便的，也制可以把word直接转换成HTML或者jsp。
而HTML也可以直接用BufferedReader()的方法来读取里面的数据再添加删除你需要的数据再转换成jsp。
你留个邮箱或者QQ的话我可以给你发一些java用jacob类库操作word的方法。
POI和jxtl也可以操作

㈡ java读取带格式word内容

用jacob吧。。

/**
*@author eyuan
*/
package per.eyuan.word2txt.core;

import com.jacob.*;
import com.jacob.com.*;
import com.jacob.activeX.*;
import java.io.*;
import java.util.Scanner;

public class Core {
/**
* 实现转换的函数
* @param sourceFilesPath
* @param destinationFilesPath
* @param destinationFilesType
* @return void
* @see import com.jacob.activeX.*;
*/
public static void change(String sourceFilesPath,String destinationFilesPath,int destinationFilesType){
//使用word文件所在的目录（源路径）建立目录文件
File sourcePathFile=new File(sourceFilesPath);
//取得word文件（源文件列表）
File sourceFilesList[]=sourcePathFile.listFiles();
System.out.println("共有"+sourceFilesList.length+"个文件（文件夹）");
//指定要转换的文件所在的目录下，如果有子目录，
//则进入子目录，继续查找word文档并将其转换，
//直到将指定目录下的所有word文档转换完。
//子目录名
String sourceChildPath=new String("");
//保持原来的层次关系，将子目录下的文件存放在新建的子目录中
String destiNationChildPath=new String("");
//检索文件，过滤掉非word文件，通过扩展名过滤
for(int i=0;i<sourceFilesList.length;i++){
//排除掉子文件夹
if(sourceFilesList[i].isFile()){
System.out.println("第"+(i+1)+"个文件：");
//取得文件全名（包含扩展名）
String fileName=sourceFilesList[i].getName();
String fileType=new String("");
//取得文件扩展名
fileType=fileName.substring((fileName.length()-4), fileName.length());
//word2007-2010扩展名为docx
//判断是否为word2007-2010文档，及是否以docx为后缀名
if(fileType.equals("docx")){
System.out.println("正在转换。。。");
//输出word文档所在路劲
System.out.println("目录："+sourceFilesPath);
//输出word文档名
System.out.println("文件名："+fileName);
//System.out.println(fileName.substring(0, (fileName.length()-5)));
//核心函数
//启动word
ActiveXComponent app=new ActiveXComponent("Word.Application");
//要转换的文档的全路径（所在文件夹+文件全名）
String docPath=sourceFilesPath+"\\"+fileName;
//转换后的文档的全路径（所在文件夹+文件名）
String othersPath=destinationFilesPath+"\\"+fileName.substring(0,(fileName.length()-5));
//
String inFile=docPath;
String outFile=othersPath;
//
boolean flag=false;
//核心代码
try{
//设置word可见性
app.setProperty("Visible", new Variant(false));
//
Dispatch docs=app.getProperty("Documents").toDispatch();
//打开word文档
Dispatch doc=Dispatch.invoke(docs, "Open", Dispatch.Method, new Object[]{inFile,new Variant(false),new Variant(true)}, new int[1]).toDispatch();
//0:Microsoft Word 97 - 2003 文档 (.doc)
//1:Microsoft Word 97 - 2003 模板 (.dot)
//2:文本文档 (.txt)
//3:文本文档 (.txt)
//4:文本文档 (.txt)
//5:文本文档 (.txt)
//6:RTF 格式 (.rtf)
//7:文本文档 (.txt)
//8:HTML 文档 (.htm)(带文件夹)
//9:MHTML 文档 (.mht)(单文件)
//10:MHTML 文档 (.mht)(单文件)
//11:XML 文档 (.xml)
//12:Microsoft Word 文档 (.docx)
//13:Microsoft Word 启用宏的文档 (.docm)
//14:Microsoft Word 模板 (.dotx)
//15:Microsoft Word 启用宏的模板 (.dotm)
//16:Microsoft Word 文档 (.docx)
//17:PDF 文件 (.pdf)
//18:XPS 文档 (.xps)
//19:XML 文档 (.xml)
//20:XML 文档 (.xml)
//21:XML 文档 (.xml)
//22:XML 文档 (.xml)
//23:OpenDocument 文本 (.odt)
//24:WTF 文件 (.wtf)
//另存为指定格式的文档
Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[]{outFile,new Variant(destinationFilesType)}, new int[1]);
//
Variant file=new Variant(false);
//关闭文档
Dispatch.call(doc, "Close",file);
//
flag=true;
}catch(Exception e){
e.printStackTrace();
System.out.println("文档转换失败");
}finally{
app.invoke("Quit",new Variant[]{});
}
System.out.println("转换完毕");
}
//word97-2003扩展名为doc
//判断是否为word2003-2007文档，及是否以doc为后缀名
else if(fileType.equals(".doc")){
System.out.println("正在转换。。。");
//输出word文档所在路劲
System.out.println("目录："+sourceFilesPath);
//输出word文档名
System.out.println("文件名："+fileName);
//System.out.println(fileName.substring(0, (fileName.length()-4)));
//核心函数
//启动word
ActiveXComponent app=new ActiveXComponent("Word.Application");
//要转换的文档的全路径（所在文件夹+文件全名）
String docPath=sourceFilesPath+"\\"+fileName;
//转换后的文档的全路径（所在文件夹+文件名）
String othersPath=destinationFilesPath+"\\"+fileName.substring(0,(fileName.length()-4));
//
String inFile=docPath;
String outFile=othersPath;
//
boolean flag=false;
//核心代码
try{
//设置word可见性
app.setProperty("Visible", new Variant(false));
//
Dispatch docs=app.getProperty("Documents").toDispatch();
//打开word文档
Dispatch doc=Dispatch.invoke(docs, "Open", Dispatch.Method, new Object[]{inFile,new Variant(false),new Variant(true)}, new int[1]).toDispatch();
//另存为指定格式的文档
Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[]{outFile,new Variant(destinationFilesType)}, new int[1]);
//
Variant file=new Variant(false);
//关闭文档
Dispatch.call(doc, "Close",file);
//
flag=true;
}catch(Exception e){
e.printStackTrace();
System.out.println("文档转换失败");
}finally{
app.invoke("Quit",new Variant[]{});
}
System.out.println("转换完毕");
}
//文档的扩展名不是doc或docx
else{
System.out.println("非word文档");
}
}
//如果是子文件夹，则递归遍历，将所有的word文档转换
else{
//
sourceChildPath=sourceFilesPath;
//该文件是目录
sourceChildPath=sourceChildPath+"\\"+sourceFilesList[i].getName()+"\\";
System.out.println("源文件所在路径："+sourceChildPath);
//修改目标文件夹，保持原来的层级关系
destiNationChildPath=destinationFilesPath;
destiNationChildPath=destinationFilesPath+"\\"+sourceFilesList[i].getName()+"\\";
System.out.println("转换后文件所在路径"+destiNationChildPath);
//
mkdir(destiNationChildPath);
//递归遍历所有目录，查找word文档，并将其转换
change(sourceChildPath, destiNationChildPath,destinationFilesType);
}
}
System.out.println("所有文档转换完毕");
}
/**
* 用于创建文件夹的方法
* @param mkdirName
*/
public static void mkdir(String mkdirName){
try{
//使用指定的路径创建文件对象
File dirFile = new File(mkdirName);
//
boolean bFile = dirFile.exists();
//已经存在文件夹，操作？？？提醒是否要替换
if( bFile == true ) {
System.out.println("已经存在文件夹"+mkdirName);
}
//不存在该文件夹，则新建该目录
else{
System.out.println("新建文件夹"+mkdirName);
bFile = dirFile.mkdir();
if( bFile == true ){
System.out.println("文件夹创建成功");
}else{
System.out.println(" 文件夹创建失败，清确认磁盘没有写保护并且空件足够");
System.exit(1);
}
}
}catch(Exception err){
System.err.println("ELS - Chart : 文件夹创建发生异常");
err.printStackTrace();
}finally{

}
}
/**
* 判断某个文件夹是否存在
* @param path
*/
public static boolean isPathExist(String path){
boolean isPathExist=false;
try{
File pathFile = new File(path);
if(pathFile.exists())
isPathExist= true;
else
isPathExist= false;
}catch(Exception err){
err.printStackTrace();
}
return isPathExist;
}
/**
* 主函数
*/
public static void main(String[] args){
Scanner sc=new Scanner(System.in);
//源文档所在路径
String sourceFilesPath="";
// String inputSourcePath="";
// boolean sourcePathFlag=true;
// System.out.println("请输入要转换文档所在的文件夹");
// while(sourcePathFlag){
// inputSourcePath=sc.next();
// if(!isPathExist(inputSourcePath))
// System.out.println("源路径不存在，请输入正确的路径");
// else
// sourcePathFlag=false;
// }
// sourceFilesPath=inputSourcePath;
sourceFilesPath="D:\\word";
//目标文档要存放的目录
String destinationFilesPath="";
// String inputdestinationPath="";
// boolean destinationPathFlag=true;
// System.out.println("请输入转换后文档要存放的文件夹");
// while(destinationPathFlag){
// inputdestinationPath=sc.next();
// //目标文件不存在时，是否要提示用户创建文件
// if(!isPathExist(inputdestinationPath))
// System.out.println("目标路径不存在，请输入正确的路径");
// else
// destinationPathFlag=false;
// }
// destinationFilesPath=inputdestinationPath;
destinationFilesPath="D:\\txt";
//选择要转换的类型
int destinationFilesType=0;
int inputNumber=0;
boolean numFlag=true;
System.out.println("您要将word文档转换为哪种文档格式？");
System.out.println("0:doc \t 2:txt \t 8:html \t 9:htm \t 11:xml \t 12:docx \t 17:pdf \t 18:xps");
while(numFlag){
inputNumber=sc.nextInt();
if(inputNumber!=2&&inputNumber!=8&&inputNumber!=9&&inputNumber!=11&&inputNumber!=12&&inputNumber!=17){
System.out.println("您的输入有误，请输入要转换的文档类型前的数字");
}else
numFlag=false;
}
destinationFilesType=inputNumber;
//实行转换
change(sourceFilesPath, destinationFilesPath,destinationFilesType);
//测试各种类型转换
// for(int i=0;i<25;i++){
// destinationFilesType=i;
// System.out.println("文件类型"+destinationFilesType);
// System.out.println("存放目录："+destinationFilesPath+"\\"+i);
// mkdir(destinationFilesPath+"\\"+i);
// change(sourceFilesPath, destinationFilesPath+"\\"+i,destinationFilesType);
// }
}
}

这个我刚用的。。格式都能带过来的。你自己再下载个 jacob的包和dll文件

㈢ java：解析word文档（前程无忧简历），最好有代码案例poi或者jacob解析都可以，有jar资源，求急。感谢

poi读取前程无忧的简历会打不开的，至少我以前读是这样的，因为他有时候是mht文件直接另存为word文档的，所以保险起见建议用jacob来读，如果他是doc或者是docx文档可以转化为html然后用jsoup来读取，效果挺好的

下面是转化的代码：

packagecom.java.doc;
importcom.jacob.activeX.ActiveXComponent;
importcom.jacob.com.Dispatch;
importcom.jacob.com.Variant;
publicclassJacobRead{
publicstaticvoidextractDoc(StringinputFIle,StringoutputFile){
booleanflag=false;

//打开Word应用程序
ActiveXComponentapp=newActiveXComponent("Word.Application");
try{
//设置word不可见
app.setProperty("Visible",newVariant(false));
//打开word文件
Dispatchdoc1=app.getProperty("Documents").toDispatch();
Dispatchdoc2=Dispatch.invoke(
doc1,
"Open",
Dispatch.Method,
newObject[]{inputFIle,newVariant(false),
newVariant(true)},newint[1]).toDispatch();
//作为txt格式保存到临时文件
Dispatch.invoke(doc2,"SaveAs",Dispatch.Method,newObject[]{
outputFile,newVariant(7)},newint[1]);
//关闭word
Variantf=newVariant(false);
Dispatch.call(doc2,"Close",f);
flag=true;
}catch(Exceptione){
e.printStackTrace();
}finally{
app.invoke("Quit",newVariant[]{});
}
if(flag==true){
System.out.println("TransformedSuccessfully");
}else{
System.out.println("TransformFailed");
}
}

publicstaticvoidmain(String[]args){
	
	
JacobRead.extractDoc("D:/xxxx简历.doc","D:/e.txt");
}
}

当然，也可以转化为txt读取，这部分代码没保存，你可以到网上找找，和转化成html的方法大差不差。

然后下面是我以前写的poi读取的方式：

packageTestHanLp;

importjava.io.FileInputStream;
importjava.io.FileNotFoundException;
importjava.io.IOException;

importorg.apache.poi.POIXMLDocument;
importorg.apache.poi.POIXMLTextExtractor;
importorg.apache.poi.hwpf.extractor.WordExtractor;
importorg.apache.poi.openxml4j.opc.OPCPackage;
importorg.apache.poi.xwpf.extractor.XWPFWordExtractor;
importorg.apache.poi.xwpf.usermodel.XWPFDocument;


publicclassTest{
	
	privatestaticStringtext="";
	publicstaticStringRead(Stringpath)throwsException{
		//解析docx格式的简历
		if(path.toLowerCase().endsWith("docx")){
			
			try{
				OPCPackageoPCPackage=POIXMLDocument.openPackage(path);
		XWPFDocumentxwpf=newXWPFDocument(oPCPackage);
		POIXMLTextExtractorex=newXWPFWordExtractor(xwpf);
	text=ex.getText();
		oPCPackage.close();
		}
		catch(FileNotFoundExceptione)
		{
		e.printStackTrace();
		}
		catch(IOExceptione)
		{
		e.printStackTrace();
		}
			
		}else{
			
			//解析doc格式的简历
			if(path.toLowerCase().endsWith("doc")){
				FileInputStreamfis=newFileInputStream(path);//载入文档
				WordExtractorwordExtractor=newWordExtractor(fis);
			String[]paragraph=wordExtractor.getParagraphText();
			StringBufferstringBuffer=newStringBuffer();
			for(inti=0;i<paragraph.length;i++){
					if(null!=paragraph[i]&&!"".equals(paragraph[i])){
						paragraph[i]=paragraph[i].substring(0,paragraph[i].length()-1);//去掉末尾符号
					}
					stringBuffer.append(paragraph[i]).append("
");//将每一小段隔开
				}
				text=stringBuffer.toString();
			}	
		}
		returntext;
	}
}

望题主采纳

对了，jacob读取word文档的效果比poi号，但运行速度不够，用的时候自己考虑考虑吧

㈣ java解析word文档有哪些方法

java读取word文档时，虽然网上介绍了很多插件poi、java2Word、jacob、itext等等，poi无法读取格式(新的估
计行好像还在处于研发阶段，不太稳定，做项目不太敢用)；java2Word、jacob容易报错找不到注册，比较诡异，我曾经在不同的机器上试过，操作
方法完全一致，有的机器不报错，有的报错，去他们论坛找高人解决也说不出原因，项目部署用它有点玄；itxt好像写很方便但是我查了好久资料没有见到过关
于读的好办法。经过一番选择还是折中点采用rtf最好，毕竟rtf是开源格式，不需要借助任何插件，只需基本IO操作外加编码转换即可。rtf格式文件表
面看来和doc没啥区别，都可以用word打开，各种格式都可以设定。

----- 实现的功能：读取rtf模板内容（格式和文本内容），替换变化部分，形成新的rtf文档。

----- 实现思路：模板中固定部分手动输入，变化的部分用$info$表示，只需替换$info$即可。

1、采用字节的形式读取rtf模板内容

2、将可变的内容字符串转为rtf编码

3、替换原文中的可变部分，形成新的rtf文档

主要程序如下：

public String bin2hex(String bin) {

char[] digital = "0123456789ABCDEF".toCharArray();

StringBuffer sb = new StringBuffer("");

byte[] bs = bin.getBytes();

int bit;

for (int i = 0; i < bs.length;i++) {

bit = (bs[i] & 0x0f0)
>> 4;

sb.append("\\'");

sb.append(digital[bit]);

bit = bs[i] & 0x0f;

sb.append(digital[bit]);

}

return sb.toString();

}

public String readByteRtf(InputStream ins, String path){

String sourcecontent =
"";

try{

ins = new
FileInputStream(path);

byte[] b
= new byte[1024];

if (ins == null) {

System.out.println("源模板文件不存在");

}

int bytesRead = 0;

while (true) {

bytesRead = ins.read(b, 0, 1024); // return final read bytes
counts

if(bytesRead == -1) {// end of InputStream

System.out.println("读取模板文件结束");

break;

}

sourcecontent += new String(b, 0, bytesRead); // convert to string
using bytes

}

}catch(Exception e){

e.printStackTrace();

}

return sourcecontent ;

}

以上为核心代码，剩余部分就是替换，从新组装java中的String.replace(oldstr,newstr);方法可以实现，在这就不贴了。源代码部分详见附件。

运行源代码前提：

c盘创建YQ目录，将附件中"模板.rtf"复制到YQ目录之下，运行OpreatorRTF.java文件即可，就会在YQ目录下生成文件名如：21时15分19秒_cheney_记录.rtf
的文件。

package com;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.io.PrintWriter;

import java.text.SimpleDateFormat;

import java.util.Date;

public class OperatorRTF {

public String strToRtf(String content){

char[] digital = "0123456789ABCDEF".toCharArray();

StringBuffer sb = new StringBuffer("");

byte[] bs = content.getBytes();

int bit;

for (int i = 0; i < bs.length; i++) {

bit = (bs[i] & 0x0f0)
>> 4;

sb.append("\\'");

sb.append(digital[bit]);

bit = bs[i] & 0x0f;

sb.append(digital[bit]);

}

return sb.toString();

}

public String replaceRTF(String content,String replacecontent,int
flag){

String rc = strToRtf(replacecontent);

String target = "";

if(flag==0){

target = content.replace("$timetop$",rc);

}

if(flag==1){

target = content.replace("$info$",rc);

}

if(flag==2){

target = content.replace("$idea$",rc);

}

if(flag==3){

target = content.replace("$advice$",rc);

}

if(flag==4){

target = content.replace("$infosend$",rc);

}

return target;

}

public String getSavePath() {

String path = "C:\\YQ";

File fDirecotry = new File(path);

if (!fDirecotry.exists()) {

fDirecotry.mkdirs();

}

return path;

}

public String ToSBC(String input){

char[] c =
input.toCharArray();

for (int i =
0; i < c.length; i++){

if (c[i] == 32){

c[i] = (char) 12288;

continue;

}

if (c[i] < 127){

c[i] = (char) (c[i] + 65248);

}

}

return new
String(c);

}

public void rgModel(String username, String content) {

// TODO Auto-generated method stub

Date current=new Date();

SimpleDateFormat sdf=new java.text.SimpleDateFormat("yyyy-MM-dd
HH:mm:ss");

String targetname = sdf.format(current).substring(11,13) + "时";

targetname += sdf.format(current).substring(14,16) + "分";

targetname += sdf.format(current).substring(17,19) + "秒";

targetname += "_" + username +"_记录.rtf";

String strpath = getSavePath();

String sourname = strpath+"\\"+"模板.rtf";

String sourcecontent = "";

InputStream ins = null;

try{

ins = new FileInputStream(sourname);

byte[] b = new byte[1024];

if (ins == null) {

System.out.println("源模板文件不存在");

}

int bytesRead = 0;

while (true) {

bytesRead = ins.read(b, 0, 1024); // return final read bytes
counts

if(bytesRead == -1) {// end of InputStream

System.out.println("读取模板文件结束");

break;

}

sourcecontent += new String(b, 0, bytesRead); // convert to string
using bytes

}

}catch(Exception e){

e.printStackTrace();

}

String targetcontent = "";

String array[] = content.split("~");

for(int i=0;i<array.length;i++){

if(i==0){

targetcontent = replaceRTF(sourcecontent, array[i], i);

}else{

targetcontent = replaceRTF(targetcontent, array[i], i);

}

}

try {

FileWriter fw = new FileWriter(getSavePath()+"\\" +
targetname,true);

PrintWriter out = new PrintWriter(fw);

if(targetcontent.equals("")||targetcontent==""){

out.println(sourcecontent);

}else{

out.println(targetcontent);

}

out.close();

fw.close();

System.out.println(getSavePath()+" 该目录下生成文件" +
targetname + " 成功");

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

public static void main(String[] args) {

// TODO Auto-generated method stub

OperatorRTF oRTF = new OperatorRTF();

String content =
"2008年10月12日9时-2008年10月12日6时~我们参照检验药品的方法~我们参照检验药品的方法~我们参照检验药品的方法~我们参照检验药品的方法";

oRTF.rgModel("cheney",content);

}

}

导航:首页 > 文件教程 > jacob读取word

jacob读取word

与jacob读取word相关的资料

友情链接