java网页数据抓取(二)

2014-11-24 10:14:25 · 作者: · 浏览: 1
url.openConnection() ;
InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
BufferedReader bufRead = new BufferedReader(inRead) ;
StringBuffer strBuf = new StringBuffer() ;
String line = "" ;
while ((line = bufRead.readLine()) != null) {
strBuf.append(line);
}
String strStart = "邮 编" ;
String strEnd = "更详细的..";
String strAll = strBuf.toString() ;

int start = strAll.indexOf(strStart) ;

int end = strAll.indexOf(strEnd) ;

String result = strAll.substring(start+40,end-55) ;
return result ;
}
public String drawChMob(String str){
StringBuffer strBuf = new StringBuffer() ;
String regex="([\u4e00-\u9fa5]+)";
Matcher matcher = Pattern.compile(regex).matcher(str);
while(matcher.find()){
strBuf.append(matcher.group(0)).toString() ;
}
return strBuf.toString() ;
}
}
class GrabIdentity{
public String grabIdentitySex(String userid)throws Exception{
String strUrl = "http://qq.ip138.com/idsearch/index.asp action=idcard&userid=" + userid + "&B1=%B2%E9+%D1%AF";
URL url = new URL(strUrl) ;
HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
BufferedReader bufRead = new BufferedReader(inRead) ;
StringBuffer strBuf = new StringBuffer() ;
String line = "" ;
while ((line = bufRead.readLine()) != null) {
strBuf.append(line);
}
String strStart = " 别" ;
String strEnd = "出生日期";
String strAll = strBuf.toString() ;

int start = strAll.indexOf(strStart) ;

int end = strAll.indexOf(strEnd) ;

String result = strAll.substring(start+7,end) ;
result = drawCh(result) ;
return result ;
}
public String grabIdentityBirth(String userid)throws Exception{
String strUrl = "http://qq.ip138.com/idsearch/index.asp action=idcard&userid=" + userid + "&B1=%B2%E9+%D1%AF";
URL url = new URL(strUrl) ;
HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
BufferedReader bufRead = new BufferedReader(inRead) ;
StringBuffer strBuf = new StringBuffer() ;
String line = "" ;
while ((line = bufRead.readLine()) != null) {
strBuf.append(line);
}
String strStart = "出生日期:" ;
String strEnd = " URL url = new URL(strUrl) ;
HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
BufferedReader bufRead = new BufferedReader(inRead) ;
StringBuffer strBuf = new StringBuffer() ;
String line = "" ;
while ((line = bufRead.readLine()) != null) {
strBuf.append(line);
}
String strStart = "证 地:" ;
String strEnd = "
部分或" ;
String strAll = strBuf.toString() ;

int start = strAll.indexOf(strStart) ;
int end = strAll.indexOf(strEnd) ;

String result = strAll.substring(start+31,end) ;
return result ;
}
public String drawCh(String str){
StringBuffer strBuf = new StringBuffer() ;
String regex="([\u4e00-\u9fa5]+)";
Matcher matcher = Pattern.compile(regex).matcher(str);
if(matcher.find()){
str = strBuf.append(matcher.group(0)).toString() ;
}
return str ;
}
}