nit spider;
interface
uses
Windows, SysUtils, Classes, GhSock, hyperstr, dialogs;
const
Max_LAYERS = 15;
type
TURL = record
Proto: string;
Server: string;
Domain: string;
Path: string;
Filename: string;
end;
TOnBeforeConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnFailConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnAfterConnect = procedure(ThreadNum, Layer: Integer; URL: TUrl) of object;
TOnLinkFound = procedure(ThreadNum, Layer: Integer; URL: TUrl; var searchThisLink: Boolean) of object;
TOnPageLoaded = procedure(ThreadNum, Layer: Integer; URL: TUrl; Header, HTMLBody, TextBody: string) of object;
TOnfoundEmail = procedure(ThreadNum, Layer: integer; EmailAddress: string) of object;
TOnGetNextUrlSeed = procedure(ThreadNum: Integer; var UrlToSearch: string) of object;
TOnError = procedure(ThreadNum: Integer; ErrorMessage: string) of object;
type
TSpider = class;
TSpiderThread = class(TThread)
private
TempUrlList: tstringlist;
LayersURLSToSearch: array[1..max_layers] of tstringlist;
Fabort: Boolean;
ThreadNumber: Integer;
MainSpiderObj: TSpider;
Http1: TGhClient;
procedure FOnTerminate(Sender: TObject);
protected
public
constructor Create(Sender: Tspider; ThreadNum: integer);
destructor Destroy; override;
procedure Execute; override;
procedure Abort;
published
end;
Tspider = class(Tcomponent)
private
{ Private declarations }
FProxyAddr: string;
FMaxLayers: Integer;
FThreadsToUse: Integer;
ThreadList: array[1..100] of TspiderThread;
FOnBeforeConnect: TOnBeforeConnect;
FOnAfterConnect: TOnAfterConnect;
FOnFailConnect: TOnFailConnect;
FOnLinkFound: TOnLinkfound;
FOnPageLoaded: TOnPageLoaded;
FOnEmailfound: TOnfoundEmail;
FOnGetNextUrlSeed: TOnGetNextUrlSeed;
FOnError: TOnError;
function Running: Boolean;
protected
{ Protected declarations }
public
{ Public declarations }
work_CS: TRTLCriticalSection;
procedure Run;
constructor Create(Aowner: Tcomponent); override;
destructor Destroy; override;
procedure Abort;
published
property ProxyAddr: string read FProxyAddr write FProxyAddr;
property MaxLayers: integer read fmaxlayers write fmaxlayers;
property ThreadsToUse: integer read FThreadsToUse write FThreadsToUse;
property OnAfterConnect: TOnAfterConnect read FOnAfterConnect write FOnAfterConnect;
property OnBeforeConnect: TOnBeforeConnect read FOnBeforeConnect write FOnBeforeConnect;
property OnFailConnect: TOnFailConnect read FOnFailConnect write FOnFailConnect;
property OnLinkFound: TOnLinkfound read FOnLinkFound write FOnLinkFound;
property OnPageLoaded: TOnPageLoaded read FOnPageLoaded write FOnPageLoaded;
property OnEmailFound: TOnfoundEmail read FOnEmailFound write FOnEmailFound;
property OnGetNextUrlSeed: TOnGetNextUrlSeed read FOnGetNextUrlSeed write FOnGetNextUrlSeed;
property OnError: TOnError read FOnError write FOnError;
{ Published declarations }
end;
procedure Register;
implementation
function IsIpAddress(IP: string): Boolean;
begin
Result := False;
if (CountF(IP, '.', 1) = 3) and IsNum(IP[1]) and IsNum(Copy(ip, length(ip), 1)) then
Result := True;
end;
function Parse_Url(InString: string): TURL;
var
Tempstr: string;
TempInt: Integer;
TempUrl: TUrl;
begin
// default ret vals.
TempUrl.proto := '';
TempUrl.server := '';
TempUrl.domain := '';
TempUrl.path := '/';
TempUrl.filename := '';
TempStr := Instring;
{If CountF(Tempstr, '.', 1)=1 Then
Begin
If copy(
TempUrl.Filename:=Tempstr;
tempstr:='';
End;}
if pos('://', tempstr) >