while (m2.find()) {
if (!"".equals(m2.group())) {
String date = m2.group();
date = date.replaceAll("年", "-");
date = date.replaceAll("月", "-");
date = date.replaceAll("日", " ");
date = date.replaceAll("/", "-");
strDate = date;
}
}
}
return strDate;
}
8.网址提取
publicstaticvoid extractsURL2(String url2) throws IOException {
HttpURLConnection url=null;
String htmladdr="http://news.163.com/special/0001220O/news_json.js";
try {
URL url1 = new URL(htmladdr);
url = (HttpURLConnection)url1.openConnection ();
url.connect();
} catch (Exception e) {
e.printStackTrace();
return ;
}
String temp = null;
String temp2 = "";
if(url!=null){
InputStream stream;
stream = url.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
try {
while ((temp = reader.readLine()) != null){
temp2 += temp;
}
} catch (RuntimeException e) {
//e.printStackTrace();
}
reader.close();
}
String ruleUrl = "http://[a-z]+.163.com/yy/mmdd/[\\d]+/[\\w]+.html";
String urlFilter = urlFilterStrategy(ruleUrl);
Pattern p = Pattern
.compile(urlFilter);
Matcher m = p.matcher(temp2);
while (m.find()) {
if (!"".equals(m.group())) {
System.out.println(m.group());
}
}
}
publicstatic String urlFilterStrategy(String urlFilter) {
Calendar calendar = Calendar.getInstance();
String year = Integer.toString(calendar.get(calendar.YEAR))
.substring(2);
String year2 = Integer.toString(calendar.get(calendar.YEAR));
String month = "0" + Integer.toString(calendar.get(calendar.MONTH) + 1);
month = month.substring(month.length() - 2);
String day = "0"
+ Integer.toString(calendar.get(calendar.DAY_OF_MONTH));
day = day.substring(day.length() - 2);
urlFilter = urlFilter.replaceAll("yyyy-mm-dd", year2+"-"+month+"-"+day);
urlFilter = urlFilter.replaceAll("yyyy", year2);
urlFilter = urlFilter.replaceAll("yy", year);
urlFilter = urlFilter.replaceAll("mm", month);
urlFilter = urlFilter.replaceAll("dd", day);
return urlFilter;
}