使用java将网页保存为mht格式(2)

//设置网页正文
　　MimeBodyPart bp = new MimeBodyPart();
　　bp.setText(content, strEncoding);
　　bp.addHeader("Content-Type", "text/html;charset=" + strEncoding);
　　bp.addHeader("Content-Location", strWeb.toString());
　　mp.addBodyPart(bp);
　　int urlCount = urlScriptList.size();
　　for (int i = 0; i < urlCount; i++) {
　　bp = new MimeBodyPart();
　　ArrayList urlInfo = (ArrayList) urlScriptList.get(i);
　　// String url = urlInfo.get(0).toString();
　　String absoluteURL = urlInfo.get(1).toString();
　　bp
　　.addHeader("Content-Location",
　　javax.mail.internet.MimeUtility
　　.encodeWord(java.net.URLDecoder
　　.decode(absoluteURL, strEncoding)));
　　DataSource source = new AttachmentDataSource(absoluteURL, "text");
　　bp.setDataHandler(new DataHandler(source));
　　mp.addBodyPart(bp);
　　}
　　urlCount = urlImageList.size();
　　for (int i = 0; i < urlCount; i++) {
　　bp = new MimeBodyPart();
　　ArrayList urlInfo = (ArrayList) urlImageList.get(i);
　　// String url = urlInfo.get(0).toString();
　　String absoluteURL = urlInfo.get(1).toString();
　　bp
　　.addHeader("Content-Location",
　　javax.mail.internet.MimeUtility
　　.encodeWord(java.net.URLDecoder
　　.decode(absoluteURL, strEncoding)));
　　DataSource source = new AttachmentDataSource(absoluteURL, "image");
　　bp.setDataHandler(new DataHandler(source));
　　mp.addBodyPart(bp);
　　}
　　msg.setContent(mp);
　　// write the mime multi part message to a file
　　msg.writeTo(new FileOutputStream(strFileName));
　　}
　　/**
　　*方法说明：mht转html
　　*输入参数：strMht mht文件路径; strHtml html文件路径
　　*返回类型：
　　*/
　　public static void mht2html(String strMht, String strHtml) {
　　try {
　　//TODO readEmlFile
　　InputStream fis = new FileInputStream(strMht);
　　Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
　　MimeMessage msg = new MimeMessage(mailSession, fis);
　　Object content = msg.getContent();
　　if (content instanceof Multipart) {
　　MimeMultipart mp = (MimeMultipart)content;
　　MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
　　String strEncodng = getEncoding(bp1);
　　String strText = getHtmlText(bp1, strEncodng);
　　if (strText == null)
　　return;
　　File parent = null;
　　if (mp.getCount() > 1) {
　　parent = new File(new File(strHtml).getAbsolutePath() + ".files");
　　parent.mkdirs();
　　if (!parent.exists())
　　return;
　　}
　　for (int i = 1; i < mp.getCount(); ++i) {
　　MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
　　String strUrl = getResourcesUrl(bp);
　　if (strUrl == null)
　　continue;
　　DataHandler dataHandler = bp.getDataHandler();
　　MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
　　File resources = new File(parent.getAbsolutePath() + File.separator + getName(strUrl, i));
　　if (saveResourcesFile(resources, bp.getInputStream()))
　　strText = JHtmlClear.replace(strText, strUrl, resources.getAbsolutePath());
　　}
　　saveHtml(strText, strHtml);
　　}
　　} catch (Exception e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　}
　　}
　　/**
　　*方法说明：得到资源文件的name
　　*输入参数：strName 资源文件链接, ID 资源文件的序号
　　*返回类型：资源文件的本地临时文件名
　　*/
　　public static String getName(String strName, int ID) {
　　char separator = ’/’;
　　System.out.println(strName);
　　System.out.println(separator);
　　if( strName.lastIndexOf(separator) >= 0)
　　return format(strName.substring(strName.lastIndexOf(separator) + 1));
　　return "temp" + ID;
　　}
　　/**
　　*方法说明：得到网页编码
　　*输入参数：bp MimeBodyPart类型的网页内容
　　*返回类型：MimeBodyPart里的网页内容的编码
　　*/
　　private static String getEncoding(MimeBodyPart bp) {
　　if (bp != null) {
　　try {
　　Enumeration list = bp.getAllHeaders();
　　while (list.hasMoreElements()) {
　　javax.mail.Header head = (javax.mail.Header)list.nextElement();
　　if (head.getName().compareTo("Content-Type") == 0) {
　　String strType = head.getValue();
　　int pos = strType.indexOf("charset=");
　　if (pos != -1) {
　　String strEncoding = strType.substring(pos + 8, strType.length());
　　if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
　　strEncoding = "gbk";
　　}
　　return strEncoding;
　　}
　　}
　　}
　　} catch (MessagingException e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　}
　　}
　　return null;
　　}
　　/**
　　*方法说明：得到资源文件url
　　*输入参数：bp MimeBodyPart类型的网页内容
　　*返回类型：资源文件url
　　*/

private static String getResourcesUrl(MimeBodyPart bp) {
　　if (bp != null) {
　　try {
　　Enumeration list = bp.getAllHeaders();
　　while (list.hasMoreElements()) {
　　javax.mail.Header head = (javax.mail.Header)list.nextElement();
　　if (head.getName().compareTo("Content-Location") == 0) {
　　return head.getValue();
　　}
　　}
　　} catch (MessagingException e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　}
　　}
　　return null;
　　}
　　/**
　　*方法说明：格式化文件名
　　*输入参数：strName 文件名
　　*返回类型：经过处理的符合命名规则的文件名
　　*/
　　private static String format(String strName) {
　　if (strName == null)
　　return null;
　　strName = strName.replaceAll(" ", " ");
　　String strText = "/:*?"<>|^___FCKpd___0quot;;
　　for (int i = 0; i < strName.length(); ++i) {
　　String ch = String.valueOf(strName.charAt(i));
　　if (strText.indexOf(ch) != -1) {
　　strName = strName.replace(strName.charAt(i), ’-’);
　　}
　　}
　　return strName;
　　}
　　/**
　　*方法说明：保存资源文件
　　*输入参数：resources 要创建的资源文件; inputStream 要输入文件中的流
　　*返回类型：boolean
　　*/
　　private static boolean saveResourcesFile(File resources, InputStream inputStream) {
　　if (resources == null || inputStream == null) {
　　return false;
　　}
　　BufferedInputStream in = null;
　　FileOutputStream fio = null;
　　BufferedOutputStream osw = null;
　　try {
　　in = new BufferedInputStream(inputStream);
　　fio = new FileOutputStream(resources);
　　osw = new BufferedOutputStream(new DataOutputStream(fio));
　　int b;
　　byte[] a = new byte[1024];
　　boolean isEmpty = true;
　　while ((b = in.read(a)) != -1) {
　　isEmpty = false;
　　osw.write(a, 0, b);
　　osw.flush();
　　}
　　osw.close();
　　fio.close();
　　in.close();
　　inputStream.close();
　　if (isEmpty)
　　resources.delete();
　　return true;
　　} catch (Exception e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　System.out.println("解析mht失败");
　　return false;
　　} finally{
　　try {
　　if (osw != null)
　　osw.close();
　　if (fio != null)
　　fio.close();
　　if (in != null)
　　in.close();
　　if (inputStream != null)
　　inputStream.close();
　　} catch (Exception e) {
　　e.printStackTrace();
　　System.out.println("解析mht失败");
　　return false;
　　}
　　}
　　}
　　/**

　　*方法说明：得到mht文件的标题
　　*输入参数：mhtFilename mht文件名
　　*返回类型：mht文件的标题
　　*/
　　public static String getTitle(String mhtFilename) {
　　try {
　　//TODO readEmlFile
　　InputStream fis = new FileInputStream(mhtFilename);
　　Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
　　MimeMessage msg = new MimeMessage(mailSession, fis);
　　Object content = msg.getContent();
　　if (content instanceof Multipart) {
　　MimeMultipart mp = (MimeMultipart)content;
　　MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
　　String strEncodng = getEncoding(bp1);
　　String strText = getHtmlText(bp1, strEncodng);
　　if (strText == null)
　　return null;
　　strText = strText.toLowerCase();
　　int pos1 = strText.indexOf("<title>");
　　int pos2 = strText.indexOf("</title>");
　　if (pos1 != -1 && pos2!= -1 && pos2 > pos1) {
　　return strText.substring(pos1 + 7, pos2).trim();
　　}
　　}
　　return null;
　　} catch (Exception e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　return null;
　　}
　　}
　　/**
　　*方法说明：得到html文本
　　*输入参数：bp MimeBodyPart类型的网页内容; strEncoding 内容编码
　　*返回类型：html文本
　　*/

private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
　　InputStream textStream = null;
　　BufferedInputStream buff = null;
　　BufferedReader br = null;
　　Reader r = null;
　　try {
　　textStream = bp.getInputStream();
　　buff = new BufferedInputStream(textStream);
　　r = new InputStreamReader(buff, strEncoding);
　　br = new BufferedReader(r);
　　StringBuffer strHtml = new StringBuffer("");
　　String strLine = null;
　　while ((strLine = br.readLine()) != null) {
　　strHtml.append(strLine + "rn");
　　}
　　br.close();
　　r.close();
　　textStream.close();
　　return strHtml.toString();
　　} catch (Exception e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　} finally{
　　try{
　　if (br != null)
　　br.close();
　　if (buff != null)
　　buff.close();
　　if (textStream != null)
　　textStream.close();
　　}catch(Exception e){
　　System.out.println("解析mht失败");
　　}
　　}
　　return null;
　　}
　　/**
　　*方法说明：保存html文件
　　*输入参数：strText html内容; strHtml html文件名
　　*返回类型：
　　*/
　　private static void saveHtml(String strText, String strHtml) {
　　try {
　　FileWriter fw = new FileWriter(strHtml);
　　fw.write(strText);
　　fw.close();
　　} catch (IOException e) {
　　// TODO Auto-generated catch block
　　e.printStackTrace();
　　System.out.println("解析mht失败");
　　}
　　}
　　private InternetAddress[] getInetAddresses(String emails) throws Exception {
　　ArrayList list = new ArrayList();
　　StringTokenizer tok = new StringTokenizer(emails, ",");
　　while (tok.hasMoreTokens()) {
　　list.add(tok.nextToken());
　　}
　　int count = list.size();
　　InternetAddress[] addresses = new InternetAddress[count];
　　for (int i = 0; i < count; i++) {
　　addresses[i] = new InternetAddress(list.get(i).toString());
　　}
　　return addresses;
　　}
　　class AttachmentDataSource implements DataSource {
　　private MimetypesFileTypeMap map = new MimetypesFileTypeMap();
　　private String strUrl;
　　private String strType;
　　private byte[] dataSize = null;
　　/**
　　* This is some content type maps.
　　*/
　　private Map normalMap = new HashMap();
　　{
　　// Initiate normal mime type map
　　// Images
　　normalMap.put("image", "image/jpeg");
　　normalMap.put("text", "text/plain");
　　}
　　public AttachmentDataSource(String strUrl, String strType) {
　　this.strType = strType;
　　this.strUrl = strUrl;
　　strUrl = strUrl.trim();
　　strUrl = strUrl.replaceAll(" ", "%20");
　　dataSize = JQuery.downBinaryFile(strUrl, null);
　　}
　　/**
　　* Returns the content type.
　　*/
　　public String getContentType() {
　　return getMimeType(getName());
　　}
　　public String getName() {
　　char separator = File.separatorChar;
　　if( strUrl.lastIndexOf(separator) >= 0 )
　　return strUrl.substring(strUrl.lastIndexOf(separator) + 1);
　　return strUrl;
　　}
　　private String getMimeType(String fileName) {
　　String type = (String)normalMap.get(strType);
　　if (type == null) {
　　try {
　　type = map.getContentType(fileName);
　　} catch (Exception e) {
　　// TODO: handle exception
　　}
　　System.out.println(type);
　　// Fix the null exception
　　if (type == null) {
　　type = "application/octet-stream";
　　}
　　}
　　return type;
　　}
　　public InputStream getInputStream() throws IOException {
　　// TODO Auto-generated method stub
　　if (dataSize == null)
　　dataSize = new byte[0];
　　return new ByteArrayInputStream(dataSize);
　　}
　　public OutputStream getOutputStream() throws IOException {
　　// TODO Auto-generated method stub
　　return new java.io.ByteArrayOutputStream();
　　}
　　}
　　}

3COME考试频道为您精心整理，希望对您有所帮助，更多信息在http://www.reader8.net/exam/

使用java将网页保存为mht格式(2)

热点推荐