我用代码写了一个类,来实现以上目的匆忙作的,用来应付老总,
写的很乱,有很多情况未能考虑周全。还请众位大侠们多多指点了。
其中借鉴了天真老兄的相关代码。
在此对众位的帮助表示忠心地感谢。
*******************************************************************************
以下程序都由Delphi7调试通过
(注:其中使用了一个TIdHTTP控件,要Delphi5上使用,请先装入此控件的Delphi5版本)
*******************************************************************************
procedure TForm1.Button1Click(Sender: TObject);
var
aa: THtmlDown;
begin
aa := THtmlDown.Create(self);
aa.URL_1 := 'http://soft.jx163.com/';
aa.SavePath_1 := 'd:/download/';
aa.HtmlFile_1 := 'aaaa.html';
aa.DownData;
Memo1.Text := aa.HtmlDoc_1.Text;
aa.Free;
end;
*******************************************************************************
unit UnHtmlDownLoad;
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
StdCtrls, IdHTTP;
type
THtmlImgList = class(TComponent)
private
FURL: String;
FTagList: TStrings;
FLinkList: TStrings;
FFileList: TStrings;
FUtterlyList: TStrings;
function GainLink(TagStr, SignStr: String): String;
function DealLink(LinkStr: String): String;
public
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
procedure Add(TagStr, SignStr: String);
procedure Del(ListNo: Integer);
procedure Clear;
published
property URL: String read FURL write FURL;
property TagList: TStrings read FTagList;
property LinkList: TStrings read FLinkList;
property FileList: TStrings read FFileList;
property UtterlyList: TStrings read FUtterlyList;
end;
THtmlDown = class(TComponent)
private
FHttp_1 : TIdHTTP;
FURL_1 : String;
FSavePath_1: String;
FHtmlFile_1: String;
FHtmlDoc_1 : TStrings;
FLinkList_1: THtmlImgList;
file://function DownloadFile(Source, Dest: String): Boolean;
procedure DownFiles(LinkList: THtmlImgList);
procedure GainLinks(HtmlDoc: String; var LinkList: THtmlImgList);
function ReplaceLink(LinkList: THtmlImgList; HtmlDoc: String): String;
protected
{ ******* }
public
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
procedure DownData;
published
property URL_1: String read FURL_1 write FURL_1;
property SavePath_1: String read FSavePath_1 write FSavePath_1;
property HtmlFile_1: String read FHtmlFile_1 write FHtmlFile_1;
property HtmlDoc_1 : TStrings read FHtmlDoc_1;
property LinkList_1 : THtmlImgList read FLinkList_1;
end;
function GetFilename(S: String): String;
implementation
{ ** 从字符串中取得文件名 ** }
function GetFilename(S: String): String;
var
i: integer;
begin
for i := length(S) downto 1 do
begin
if (s = '/') or (S = '/') then break;
end;
Result := copy(S, i + 1, length(S) - i);
end;
{ ** THtmlImgList ** }
constructor THtmlImgList.Create(AOwner: TComponent);
begin
FTagList := TStringList.Create;
FLinkList := TStringList.Create;
FFileList := TStringList.Create;
FUtterlyList := TStringList.Create;
end;
destructor THtmlImgList.Destroy;
begin
FTagList.Free;
FLinkList.Free;
FFileList.Free;
FUtterlyList.Free;
end;
procedure THtmlImgList.Add(TagStr, SignStr: String);
var
iNo: Integer;
sLink, sUtterly: String;
begin
sLink := GainLink(TagStr, SignStr);
sUtterly := DealLink(sLink);
if (sLink = '') or (sUtterly = '') then Exit;
for iNo := 0 to FLinkList.Count - 1 do
begin
if FLinkList.Strings[iNo] = sLink then Exit;
end;
FTagList.Add(TagStr);
FLinkList.Add(sLink);
FFileList.Add(GetFilename(sLink));
FUtterlyList.Add(sUtterly);
end;
procedure THtmlImgList.Del(ListNo: Integer);
begin
FTagList.Delete(ListNo);
FLinkList.Delete(ListNo);
FFileList.Delete(ListNo);
FUtterlyList.Delete(ListNo);
end;
procedure THtmlImgList.Clear;
begin
FTagList.Clear;
FLinkList.Clear;
FFileList.Clear;
FUtterlyList.Clear;
end;
function THtmlImgList.GainLink(TagStr, SignStr: String): String;
var
n, m,
iBeg : Integer;
sTemp: String;
begin
sTemp := TagStr;
iBeg := pos(SignStr, sTemp);
if iBeg <> 0 then
delete(sTemp, 1, iBeg)
else begin
Result := '';
Exit;
end;
for n := 0 to Length(sTemp) do
begin
if sTemp[n] = '"' then Break;
end;
n := n + 1;
for m := n to Length(sTemp) do
begin
if sTemp[m] = '"' then Break;
end;
Result := copy(sTemp, n, m - n);
end;
function THtmlImgList.DealLink(LinkStr: String): String;
var
i, beginpos: integer;
sTemp, sBaseURL, sLinkStr: String;
begin
if LinkStr = '' then Exit;
sBaseURL := URL;
sLinkStr := LinkStr;
sLinkStr := StringReplace(sLinkStr, '"', '', [rfReplaceAll]);
if copy(sLinkStr, 1, 7) = 'http://' then
begin
Result := sLinkStr;
Exit;
end;
if sLinkStr[1] = '/' then
begin
if copy(sBaseURL, 1, 7) = 'http://' then
sBaseURL := copy(sBaseURL, 8, Length(sBaseURL) - 7);
sBaseURL := copy(sBaseURL, 1, pos('/', sBaseURL ) - 1);
Result := 'http://' + sBaseURL + sLinkStr;
Exit;
end;
sLinkStr := StringReplace(sLinkStr, '/', '/', [rfReplaceAll]);
if pos(' ', sLinkStr) <> 0 then
begin
sLinkStr := copy(sLinkStr, 1, pos(' ', sLinkStr) - 1);
end;
if copy(sBaseURL, 1, 7) = 'http://' then
sBaseURL := copy(sBaseURL, 8, Length(sBaseURL) - 7);
for i:= length(sBaseURL) downto 1 do
begin
if (sBaseURL = '/') or (sBaseURL = '/') then
begin
break;
end;
end;
if i <> 0 then
sBaseURL := copy(sBaseURL, 1, i)
else
sBaseURL := sBaseURL + '/';
if ((pos('.', sLinkStr) <> 0) and (pos('./', sLinkStr) = 0) and (sLinkStr[1] <> '/')) or (pos('www.', sLinkStr) <> 0) then sTemp := sLinkStr;
if (pos('../', sLinkStr) = 0) and (POS('./', sLinkStr) = 0) then file://没有指定目录//??要改改
begin
if sLinkStr[1] = '/' then
sLinkStr := copy(sLinkStr, 2, length(sLinkStr) - 1);
sTemp := sBaseURL + sLinkStr;
end
else begin
i := length(sBaseURL)-1;
while pos('../', sLinkStr) <> 0 do
begin
beginpos := pos('../', sLinkStr);
delete(sLinkStr, 1, beginpos + 2);
while i >= 1 do
begin
if sBaseURL = '/' then
begin
dec(i);
break;
end;
dec(i);
end;
end;
sTemp := copy(sBaseURL, 1, i) + '/' + sLinkStr;
end;
Result := 'http://' + sTemp;
end;
{ ** THtmlDown ******************************* }
constructor THtmlDown.Create(AOwner: TComponent);
begin
FHttp_1 := TIdHTTP.Create(nil);
FHtmlDoc_1 := TStringList.Create;
FLinkList_1:= THtmlImgList.Create(nil);
end;
destructor THtmlDown.Destroy;
begin
FHttp_1.Free;
FHtmlDoc_1.Free;
FLinkList_1.Free;
end;
procedure THtmlDown.DownData;
begin
FHtmlDoc_1.Clear;
FLinkList_1.Clear;
FLinkList_1.URL := URL_1;
FHtmlDoc_1.Text := FHttp_1.Get(FUrl_1);
GainLinks(FHtmlDoc_1.Text, FLinkList_1);
DownFiles(FLinkList_1);
FHtmlDoc_1.Text := ReplaceLink(FLinkList_1, FHtmlDoc_1.Text);
{
FHtmlDoc_1.Add(FLinkList_1.TagList.Text);
FHtmlDoc_1.Add(FLinkList_1.LinkList.Text);
FHtmlDoc_1.Add(FLinkList_1.UtterlyList.Text);
}
FHtmlDoc_1.SaveToFile(FSavePath_1 + FHtmlFile_1);
end;
procedure THtmlDown.GainLinks(HtmlDoc: String; var LinkList: THtmlImgList);
var
sTemp: String;
iStart_1,
iStart_2,
iEndTag: Integer;
begin
sTemp := HtmlDoc;
while True do
begin
iStart_1 := pos('<BODY', sTemp);
iStart_2 := pos('<body', sTemp);
if (iStart_1 + iStart_2) = 0 then Break;
if iStart_2 <> 0 then
if iStart_1 < iStart_2 then iStart_1 := iStart_2;
Delete(sTemp, 1, iStart_1 - 1);
for iEndTag := 0 to Length(sTemp) do
begin
if sTemp[iEndTag] = '>' then Break;
end;
LinkList.Add(Copy(sTemp, 1, iEndTag), 'background');
Delete(sTemp, 1, iEndTag);
end;
sTemp := HtmlDoc;
while True do
begin
iStart_1 := pos('<LINK', sTemp);
iStart_2 := pos('<link', sTemp);
if (iStart_1 + iStart_2) = 0 then Break;
if iStart_2 <> 0 then
if iStart_1 < iStart_2 then iStart_1 := iStart_2;
Delete(sTemp, 1, iStart_1 - 1);
for iEndTag := 0 to Length(sTemp) do
begin
if sTemp[iEndTag] = '>' then Break;
end;
LinkList.Add(Copy(sTemp, 1, iEndTag), 'href');
Delete(sTemp, 1, iEndTag);
end;
sTemp := HtmlDoc;
while True do
begin
iStart_1 := pos('<IMG', sTemp);
iStart_2 := pos('<img', sTemp);
if (iStart_1 + iStart_2) = 0 then Break;
if iStart_2 <> 0 then
if iStart_1 < iStart_2 then iStart_1 := iStart_2;
Delete(sTemp, 1, iStart_1 - 1);
for iEndTag := 0 to Length(sTemp) do
begin
if sTemp[iEndTag] = '>' then Break;
end;
LinkList.Add(Copy(sTemp, 1, iEndTag), 'src');
Delete(sTemp, 1, iEndTag);
end;
end;
procedure THtmlDown.DownFiles(LinkList: THtmlImgList);
var
iNo: Integer;
sTempDir,
sTempLink,
sTempFile: String;
msImg: TMemoryStream;
begin
msImg := TMemoryStream.Create;
sTempDir := Copy(FHtmlFile_1, 1, pos('.', FHtmlFile_1) - 1) + '.Files/';
if not DirectoryExists(FSavePath_1 + sTempDir) then MkDir(FSavePath_1 + sTempDir);
for iNo := 0 to LinkList.TagList.Count - 1 do
begin
msImg.Clear;
sTempLink := LinkList.UtterlyList.Strings[iNo];
FHttp_1.Get(sTempLink, msImg);
sTempFile := FSavePath_1 + sTempDir + LinkList.FileList.Strings[iNo];
if msImg.Size > 0 then msImg.SaveToFile(sTempFile);
end;
end;
function THtmlDown.ReplaceLink(LinkList: THtmlImgList; HtmlDoc: String): String;
var
iNo: Integer;
sOld,
sNew,
sPath,
sTemp: String;
begin
sTemp := HtmlDoc;
sPath := Copy(FHtmlFile_1, 1, pos('.', FHtmlFile_1) - 1) + '.Files/';
for iNo := 0 to LinkList.LinkList.Count - 1 do
begin
sOld := LinkList.LinkList.Strings[iNo];
sNew := sPath + LinkList.FileList.Strings[iNo];
sTemp := StringReplace(sTemp, sOld, sNew, [rfReplaceAll]);
end;
Result := sTemp;
end;
end.