如何使用Socket获取网页源文件?(50分)

  • 主题发起人 主题发起人 jshyh
  • 开始时间 开始时间
J

jshyh

Unregistered / Unconfirmed
GUEST, unregistred user!
我想使用Socket编一个多线程的获取网页源文件的程序,即给一个URL作为参数,返回网页
的源文件,象IE中查看-〉原文件那样的文本文件。请各位高手指教,最好有示例代码,分数不够可以再加!
 
Memo1.Text := DownloadWithInet('http://www.delphibbs.com');
or
Memo1.Text := DownloadWithSocket('http://www.delphibbs.com');
两者各有利弊,自己选择.

uses WinSock, WinInet;

function DownloadWithInet(const AUrl: string): string;

procedure Add(Buf: PChar; Count: Integer);
var
Len: Integer;
begin
Len := Length(Result);
SetLength(Result, Len + Count);
Move(Buf^, Result[Len + 1], Count);
end;

function PrepareURL: string;
begin
Result := UpperCase(Copy(AUrl, 1, 7));
if Result <> 'HTTP://' then
Result := 'http://' + AUrl
else
Result := AUrl;
end;

var
BytesRead: DWORD;
Session, Connection: HINTERNET;
Buffer: array[1..1024] of Char;
begin
Result := '';
if AUrl = '' then Exit;
Session := InternetOpen(nil, INTERNET_OPEN_TYPE_DIRECT, nil, nil, 0);
if not Assigned(Session) then
raise Exception.Create(SysErrorMessage(GetLastError));
try
Connection := InternetOpenUrl(Session, PChar(PrepareURL), nil, 0,
INTERNET_FLAG_RAW_DATA, {INTERNET_FLAG_RELOAD, }0);
if not Assigned(Connection) then
raise Exception.Create(SysErrorMessage(GetLastError));
try
repeat
FillChar(Buffer, SizeOf(Buffer), 0);
InternetReadFile(Connection, @Buffer, SizeOf(Buffer), BytesRead);
if BytesRead > 0 then
Add(@Buffer, BytesRead);
Application.ProcessMessages;
until BytesRead = 0;
finally
InternetCloseHandle(Connection);
end;
finally
InternetCloseHandle(Session);
end;
end;

function DownloadWithSocket(const AUrl: string): string;
const
CRLF = #13#10;
SFileContentLen = 'content-length: ';
SUserAgent =
'User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.0.3705)';
SRequestFileHead =
'HEAD %s HTTP/1.1' + CRLF +
'Pragma: no-cache' + CRLF +
'Cache-Control: no-cache' + CRLF +
SUserAgent + CRLF +
'Host: %s' + CRLF + CRLF;
SRequestDownFile =
'GET %s HTTP/1.1' + CRLF +
'Accept: */*' + CRLF +
SUserAgent + CRLF +
'RANGE: bytes=0-' + CRLF +
'Host: %s' + CRLF + CRLF;

procedure ExtractHostAndFileName(const AURL: string;
var AHost, AFileName: string; APort: PString = nil);
const
HttpHead = 'http://';
HttpHeadLen = Length(HttpHead);
var
I: Integer;
begin
AHost := AURL;
I := Pos(HttpHead, AURL);
if I <> 0 then
AHost := Copy(AHost, I + HttpHeadLen, MaxInt);
I := AnsiPos('/', AHost);
while I <> 0 do
begin
AHost := Copy(AHost, 1, I - 1);
I := AnsiPos('/', AHost);
end;
I := Pos(AHost, AURL) + Length(AHost);
AFileName := Copy(AURL, i, MaxInt);
I := Pos(':', AHost);
if I <> 0 then
begin
if Assigned(APort) then
APort^ := Copy(AHost, I + 1, MaxInt);
AHost := Copy(AHost, 1, I - 1);
end;
end;

var
Socket: TSocket;

function WaitForSocket(Timeout: Integer): Boolean;
var
FDSet: TFDSet;
TimeVal: TTimeVal;
begin
TimeVal.tv_sec := Timeout;
TimeVal.tv_usec := 0;
FD_ZERO(FDSet);
FD_SET(Socket, FDSet);
Result := WinSock.select(0, @FDSet, nil, nil, @TimeVal) > 0;
end;

procedure Add(var S: string; Buf: PChar; Count: Integer);
var
Len: Integer;
begin
Len := Length(S);
SetLength(S, Len + Count);
Move(Buf^, S[Len + 1], Count);
end;

function ReceiveLine: string;
var
C: Char;
RetLen: Integer;
begin
Result := '';
while Socket <> INVALID_SOCKET do
begin
RetLen := recv(Socket, C, 1, 0);
if (RetLen <= 0) or (RetLen = SOCKET_ERROR) then
break;
Add(Result, @C, 1);
if Pos(CRLF, Result) > 0 then break;
end;
end;

function SendCommand(const Command: string): string;
var
P: PChar;
Data: string;
begin
Result := '';
P := PChar(Command);
send(Socket, P^, Length(Command), 0);
while WaitForSocket(5) do
begin
Data := ReceiveLine;
if (Data = '') or (Data = CRLF) then
break else
Add(Result, PChar(Data), Length(Data));
end;
end;

procedure InitSocket(const AHost: string);
var
Addr: TSockAddrIn;
Data: TWSAData;
HostEnt: PHostEnt;
Timeout: Integer;
begin
Winsock.WSAStartup($0101, Data);
Socket := WinSock.socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
if Socket = INVALID_SOCKET then
raise Exception.Create(SysErrorMessage(GetLastError));
Timeout := 1000;
WinSock.setsockopt(Socket, SOL_SOCKET, SO_RCVTIMEO, @Timeout, SizeOf(TimeOut));
HostEnt := gethostbyname(PChar(AHost));
FillChar(Addr.sin_addr, SizeOf(Addr.sin_addr), 0);
Addr.sin_family := PF_INET;
if HostEnt <> nil then
Move(HostEnt^.h_addr^[0], Addr.sin_addr.S_addr, HostEnt^.h_length)
else
raise Exception.CreateFmt('主机没找到: %s', [AHost]);
Addr.sin_port := htons(80);
if connect(Socket, Addr, SizeOf(Addr)) <> 0 then
raise Exception.Create(SysErrorMessage(GetLastError));
end;

procedure UnInitSocket;
begin
if Socket <> INVALID_SOCKET then
closesocket(Socket);
WSACleanup;
end;

var
Data, FileName, Host: string;
begin
Socket := INVALID_SOCKET;
ExtractHostAndFileName(AUrl, Host, FileName);
try
InitSocket(Host);
if FileName = '' then
FileName := '/';
Data := SendCommand(Format(SRequestFileHead, [FileName, Host]));
Data := SendCommand(Format(SRequestDownFile, [FileName, Host]));
while True do
begin
Data := ReceiveLine;
if Data = '' then break;
Add(Result, PChar(Data), Length(Data));
Application.ProcessMessages;
end;
finally
UnInitSocket;
end;
end;
 
我发现使用Socket很慢,使用WinInet很快,WinInet的速度是Socket的两倍多,那么Socket
的优点又是什么呢?请指教。
 
接受答案了.
 
wininet取第一次速度也不快,第二次很快,原因就是它取的是缓冲数据,如果有最新的数据,如BBS的回复贴子,它是取不会出来的。(可能有些需要设置吧,我不太清楚)
而SOCKET则是需要每次去连接,再取,所以,它可以取出最新的数据,相反WinInet则不行,所以说各有利弊,看你的需要而定。
 
我懂了,谢谢。
 
后退
顶部