调用示例:
File powerPointFile = new File("D:\\temp.ppt"); //读取PowerPoint文档中所有文本内容,以字符串形式返回 System.out.println(PowerPointFileUtil.extractTextFromPowerPointFile(powerPointFile , "," , ";"));
工具类源码:
/** * PowerPointFileUtil.java * Copyright ® 2010 窦海宁 * All right reserved */ package org.aiyu.core.common.util.file; import java.io.File; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.model.AutoShape; import org.apache.poi.hslf.model.Shape; import org.apache.poi.hslf.model.Slide; import org.apache.poi.hslf.usermodel.SlideShow; /** * <p>PowerPoint文件工具类 * * <p>通用的PowerPoint文件工具类,可用于从PowerPoint文档中抽取文本信息 * * @author 窦海宁, chong0660@sina.com * @since AiyuCommonCore-1.0 * @version AiyuCommonCore-1.0 */ @SuppressWarnings("unchecked") public abstract class PowerPointFileUtil { /** * <p>从PowerPoint文档中提取文本信息 * * @param powerPointFile PowerPoint文件 * @param shapeSeparator Shape分隔符 * @param slideSeparator Slide分隔符 * * @return 提取后的文本信息 * * @modify 窦海宁, 2013-07-03 */ public static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) { StringBuffer returnValue = new StringBuffer(); if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) { if (powerPointFile.isFile()) { try { SlideShow slideShow = new SlideShow(new HSLFSlideShow(powerPointFile.getCanonicalPath())); Iterator slideIterator = PowerPointFileUtil.readSlideShow(slideShow).iterator(); //遍历Slide while (slideIterator.hasNext()) { Iterator shapeIterator = ((List) slideIterator.next()).iterator(); //遍历Shape while (shapeIterator.hasNext()) { Object shapeValue = shapeIterator.next(); if (shapeValue != null) { returnValue.append((String) shapeValue); if (shapeIterator.hasNext()) { returnValue.append(shapeSeparator); } } } if (slideIterator.hasNext()) { returnValue.append(slideSeparator); } } } catch (Exception ex) { ex.printStackTrace(); } } } return StringUtils.trimToNull(returnValue.toString()); } /** * <p>读取PowerPoint文件中的幻灯片对象 * * @param slideShow SlideShow对象 * * @return 读取出的工作薄列表 * * @modify 窦海宁, 2008-08-07 */ public static List readSlideShow(SlideShow slideShow) { List slideList = null; if (slideShow != null) { slideList = new ArrayList(); Slide[] slides = slideShow.getSlides(); for (int i = 0 ; i < slides.length ; i++) { slideList.add(PowerPointFileUtil.readSlide(slides[i])); } } return slideList; } /** * <p>读取指定的Slide中的数据 * * @param slide Slide对象 * * @return 读取出的Slide数据列表 * * @modify 窦海宁, 2008-08-07 */ public static List readSlide(Slide slide) { List shapeList = null; if (slide != null) { shapeList = new ArrayList(); Shape[] shape = slide.getShapes(); for (int i = 0 ; i < shape.length ; i++) { shapeList.add(PowerPointFileUtil.readShape(shape[i])); } } return shapeList; } /** * <p>读取指定的图形的数据 * * @param shape Slide中的图形对象 * * @return 读取出的图形数据 * * @modify 窦海宁, 2010-01-07 */ public static Object readShape(Shape shape) { String returnValue = null; if (shape != null) { if (shape instanceof AutoShape) { try { returnValue = ((AutoShape) shape).getText(); } catch (Exception ex) { ex.printStackTrace(); } } } return returnValue; } }
您好,我是窦海宁,现在是一名免费开源工具研发人员,如果您喜欢我的开源代码,如果您希望我更好的发展下去,为您提供更多更好的开源代码……在这里感谢您的捐助。
- 本文附件下载:
- PowerPointFileUtil.rar (1.3 KB)
已有 0人发表留言,猛击->> 这里<<-参与讨论
ITeye推荐