网络蜘蛛(网络机器人)(100分)

  • 主题发起人 主题发起人 xgwzw
  • 开始时间 开始时间
X

xgwzw

Unregistered / Unconfirmed
GUEST, unregistred user!
谁有相关代码的 发一分谢谢xgwzw@tom.com
 

我也正想要这方面的东西,哪位大虾有相关的代码,谢谢!
 
nit spider;

interface

uses
Windows, SysUtils, Classes, GhSock, hyperstr, dialogs;

const
Max_LAYERS = 15;

type
TURL = record
Proto: string;
Server: string;
Domain: string;
Path: string;
Filename: string;
end;
TOnBeforeConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnFailConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnAfterConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnLinkFound = procedure(ThreadNum, Layer: Integer; URL: TUrl; var searchThisLink: Boolean) of object;
TOnPageLoaded = procedure(ThreadNum, Layer: Integer; URL: TUrl; Header, HTMLBody, TextBody: string) of object;
TOnfoundEmail = procedure(ThreadNum, Layer: integer; EmailAddress: string) of object;
TOnGetNextUrlSeed = procedure(ThreadNum: Integer; var UrlToSearch: string) of object;
TOnError = procedure(ThreadNum: Integer; ErrorMessage: string) of object;

type
TSpider = class;
TSpiderThread = class(TThread)
private
TempUrlList: tstringlist;
LayersURLSToSearch: array[1..max_layers] of tstringlist;
Fabort: Boolean;
ThreadNumber: Integer;
MainSpiderObj: TSpider;
Http1: TGhClient;
procedure FOnTerminate(Sender: TObject);
protected

public
constructor Create(Sender: Tspider; ThreadNum: integer);
destructor Destroy; override;
procedure Execute; override;
procedure Abort;
published

end;

Tspider = class(Tcomponent)
private
{ Private declarations }
FProxyAddr: string;
FMaxLayers: Integer;
FThreadsToUse: Integer;
ThreadList: array[1..100] of TspiderThread;
FOnBeforeConnect: TOnBeforeConnect;
FOnAfterConnect: TOnAfterConnect;
FOnFailConnect: TOnFailConnect;
FOnLinkFound: TOnLinkfound;
FOnPageLoaded: TOnPageLoaded;
FOnEmailfound: TOnfoundEmail;
FOnGetNextUrlSeed: TOnGetNextUrlSeed;
FOnError: TOnError;
function Running: Boolean;
protected
{ Protected declarations }
public
{ Public declarations }
work_CS: TRTLCriticalSection;
procedure Run;
constructor Create(Aowner: Tcomponent); override;
destructor Destroy; override;
procedure Abort;
published
property ProxyAddr: string read FProxyAddr write FProxyAddr;
property MaxLayers: integer read fmaxlayers write fmaxlayers;
property ThreadsToUse: integer read FThreadsToUse write FThreadsToUse;
property OnAfterConnect: TOnAfterConnect read FOnAfterConnect write FOnAfterConnect;
property OnBeforeConnect: TOnBeforeConnect read FOnBeforeConnect write FOnBeforeConnect;
property OnFailConnect: TOnFailConnect read FOnFailConnect write FOnFailConnect;
property OnLinkFound: TOnLinkfound read FOnLinkFound write FOnLinkFound;
property OnPageLoaded: TOnPageLoaded read FOnPageLoaded write FOnPageLoaded;
property OnEmailFound: TOnfoundEmail read FOnEmailFound write FOnEmailFound;
property OnGetNextUrlSeed: TOnGetNextUrlSeed read FOnGetNextUrlSeed write FOnGetNextUrlSeed;
property OnError: TOnError read FOnError write FOnError;
{ Published declarations }
end;


procedure Register;

implementation

function IsIpAddress(IP: string): Boolean;
begin
Result := False;
if (CountF(IP, '.', 1) = 3) and IsNum(IP[1]) and IsNum(Copy(ip, length(ip), 1)) then
Result := True;
end;

function Parse_Url(InString: string): TURL;
var
Tempstr: string;
TempInt: Integer;
TempUrl: TUrl;
begin

// default ret vals.
TempUrl.proto := '';
TempUrl.server := '';
TempUrl.domain := '';
TempUrl.path := '/';
TempUrl.filename := '';

TempStr := Instring;

{If CountF(Tempstr, '.', 1)=1 Then
Begin
If copy(
TempUrl.Filename:=Tempstr;
tempstr:='';
End;}


if pos('://', tempstr) &gt
 
网络蜘蛛是啥子玩意?
 
回楼上,是搜索引擎的一部分,负责从网页中提取信息
 
to weiliu
没有贴完整啊,xgwzw@tom.com
如果太长:xgwzw@tom.com
 
我也要一份 xxljishiben◎126。com
 
pzdjmusic@126.com
 
9607gfg@sina.com 学习,请发给我一份!谢谢
 
samy_ywj@163.com谢谢
 
我也想要,学习学习。fangzhou6211@sina.com
 
我也要一份
flfq@yahoo.com.cn
 
代码空间上有控件,呵呵去下吧,www.csdn.net
 
后退
顶部