小程序:Java下载单页HTML(可下载引用资源)

2014-11-24 00:57:59 · 作者: · 浏览: 0
本程序可下载页面所依赖的CSS/JS,图片等引用,目前不包含下载关联HTML页面这个功能。代码如下
/*
 *****************************************************************************
 * This software is under the Apache License Version 2.0
 * Author: Tao -  mail:cn.java.river@gmail.com
 * Spreading Your Heart
 ****************************************************************************
 */

package atao.util.html;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;

import org.apache.commons.lang.StringUtils;

/**
 * 
 * A Simple HTML downloader which can also download Page resources.
 * 
* Note: This Tool won't download related or sub HTML * * @author Tao * @since 1.0 */ public class HtmlDownloader { //URL will be downloaded. private static String url = "http://pervasive2.morselli.unimo.it/~nicola/courses/IngegneriaDelSoftware/java/J6d_xml.html"; //workspace folder. private static String workspace = "download"; //sub css and js resources sign private static String urlSign = "
Note: don't use Java Character Writers, * otherwise you can't get pictures correctly. * * @param subUrl */ private static void downloadChild (String subUrl) { if (StringUtils.isNotEmpty (subUrl)) { if (subUrl.startsWith ("http:")) { System.out.println ("subUrl not support yet."); } else { long start = System.nanoTime (); try { String forUrl = subUrl.replace (" ", "%20"); if (!forUrl.startsWith ("/")) { forUrl = "/" + forUrl; } URL u = new URL (rootUrl + forUrl); InputStream reader = u.openStream (); File f = createDownloadFile (subUrl); FileOutputStream writer = new FileOutputStream (f); byte[] buff = new byte[1024]; int size = -1; while ((size = reader.read (buff)) != -1) { writer.write (buff, 0, size); } reader.close (); writer.close (); } catch (Exception e) { e.printStackTrace (); } System.out.println ("Source:" + subUrl +"download time(s):" + String.format ("%.3f", (double)(System.nanoTime () - start)/ 1000000000.00)); } } else { System.out.println ("subUrl is Empty."); } } /** * create sub file,create parent folders if necessary. * * @param url related path of a url source. * @return created file. */ private static File createDownloadFile (String url) { File f = new File (workspace, url); f.getParentFile ().mkdirs (); return f; } }