http代理工作原理(3)(二)
ackTrace();
}
finally
{
if(socketServer != null)
{
try
{
socketServer.close();
}
catch(IOException e)
{
}
}
}
}
private static void service(Socket socket)
{
Socket remote = null;
try
{
socket.setSoTimeout(2000);
socket.setKeepAlive(false);
InputStream inputStream = socket.getInputStream();
OutputStream outputStream = socket.getOutputStream();
/**
* 读取协议头的第一行
* 格式: GET http://www.mytest.com HTTP/1.1
*/
byte[] buffer = readLine(inputStream);
if(buffer.length < 1)
{
return;
}
String header = new String(buffer, "UTF-8");
String[] action = header.split(" ");
if(action.length < 3)
{
return;
}
String address = action[1];
/**
* 目标地址是从http协议的第一行取
* 目标主机应该从协议的Host头里面取,如果Host取不到, 从地址里面取
* 此处为了简化逻辑只从地址里面取host, 因此如果路径不是绝对路径就忽略
*/
if(address.startsWith("http://") == false)
{
return;
}
System.out.print(header);
URL url = new URL(address);
String host = url.getHost();
int port = (url.getPort() > -1 url.getPort() : 80);
remote = new Socket(host, port);
InputStream remoteInputStream = remote.getInputStream();
OutputStream remoteOutputStream = remote.getOutputStream();
/**
* 某些服务器对协议头必须一次性读完, 例如QQ空间
* 因此此处先读出协议头, 并且一次写入, 写入之后必须flush
* 否则就跳转到QQ首页了
*/
long contentLength = -1L;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
bos.write(buffer, 0, buffer.length);
/**
* 读取协议头
* 也可以不读取协议头, 而是直接把inputStream写入到remoteOutputStream
* 为了兼容某些服务器, 此处简单的读取一下协议头
*/
while((buffer = readLine(inputStream)).length > 0)
{
header = new String(buffer, "UTF-8").trim();
if(header.length() < 1)
{
break;
}
if(header.startsWith("Content-Length:"))
{
try
{
contentLength = Long.parseLong(header.substring(15).trim());
}
catch(NumberFormatException e){}
}
bos.write(buffer, 0, buffer.length);
}
/** 协议头和主体之间的空行 */
bos.write(CRLF);
remoteOutputStream.write(bos.toByteArray());
remoteOutputStream.flush();
/** 如果存在contentLength */
if(contentLength > 0)
{
copy(inputStream, remoteOutputStream, 4096, contentLength);
}
try
{
/**
* 将目标主机返回的数据写入到客户端
* 此处应该检查一下Content-Length, 并且根据Content-Length来决定要写入多少数据
* 不过很多服务器经常会不返回Content-Length,
* 没有Content-Length, read函数会一直读取