我写了一段代码,用来下载网页,同时分析网页里的图片连接,
再下载到本地目录的image目录下,同时把原网页的地址改为
本机的相对地址image/..。
procedure SaveURLPageToDir;
var
S:string;
strURL:String;
strURLDIR:string;
strGet:String;
strImageFileName:string;
strNewBody:String;
strGetURL:string;
strImgDir,strImgName:string;
strTemp:String;
strt:string;
nTemp,nLenTemp:integer;
bBeInMark,bImgDown,bCgiURL:boolean;
MyHttp2:TNMHttp;
F:textFile;
I,L,j,k:Integer;
imgfile:file of byte;
begin
MyHttp2:=TNMHTTP.Create(nil);
With Sender as TMenuItem do
begin
strURL:=GetURLFromIE(shSave);
if strURL='' then
begin
exit;
end;
showmessage('网页地址:'+strURL);
I:=Pos('//',strURL);
if I>0 then
S:=Copy(StrURL,I+2,length(strURL))+'.html';
S:=StringReplace(S,'/','_',[rfReplaceAll]);
// if S='' then S:='/Index'+dateToStr(Date)+' .html';
showmessage('保存路径:'+SaveGroup.Dirs[GroupIndex].Dir);
setCurrentDir(SaveGroup.Dirs[GroupIndex].Dir);
end; //end of with
if (pos('www',strURL)=0)
then strImgDir:=copy(strURL,8,4)+'images'
else strImgDir:=copy(strURL,12,4)+'images';
// strImgDir:='images';
createDir(strImgDir);
assignFile(F,S);
strGet:=ReadFromURL(strURL); //从URL取到页面,写入strGet以备后用
try
rewrite(F);
except
exit;
end;
///插入下载图片
strURLDir:='';
I:=lastDelimiter('/',strURL); //get last delimiter '/'
if strURL<>'/' then
begin
strURLDir:=strURLDir+'/';
Inc(I);
end;
strURLDir:=Copy(strURL,1,I);
strTemp:=strGet;
strNewBody:='';
i:=1;
nLenTemp:=length(strTemp); //html的总字节数
bBeInMark:=false; //标志是否在mark里
while i<=nLenTemp do
begin
if (strTemp<>'<') then
begin
if strTemp='>' then
bBeInMark:=false;
strNewBody:=strNewBody+strTemp;
i:=i+1;
end //end strTemp <> '<'
else//if strTemp='<'
begin
bBeInMark:=true; //一个mark的开始
S:='';
For K:=1 to 5 do
begin
S:=S+UpCase(strTemp[I+K]);
end;
//judge if the symbol is "<img"
if ((pos('IMG',s)=1)or(pos('img',s)=1))then
begin
//if is simbol "<img"
bImgDown:=true;
//get the picture url from strTemp
while Not((strTemp='=') and (UpCase(strTemp[i-1])='C')) do
begin
strNewBody:=strNewBody+strTemp;
i:=i+1;
end;
strNewBody:=strNewBody+strTemp;
i:=i+1;
//get the image URL String
strGetURL:='';
if (strTemp='"') or (strTemp='''') then
inc(i);
while (strTemp<>' ') and (strTemp<>'>') do
begin
strGetURL:=strGetURL+strTemp;
i:=i+1;
if (strTemp='"')or(strTemp='''')
then break;
end;
//if strGetURL if a full http address
//get image file data
if pos('?',strGetURL)=0
then begin //不是动态生成的图片
nTemp:=LastDelimiter('/',strGetURL);
strImgName:=copy(strGetURL,nTemp+1,length(strGetURL)-nTemp);
//save the image date to file
if not (FileExists(strImgDir+strImgName))
then begin
strt:=strURL;
if (copy(strt,1,7)='http://')
then strt:=copy(strt,8,length(strt)-7);
strGetURL:=GetAbsolute(strt,strGetURL);
MyHttp2.InputFileMode:=true;
MyHttp2.Body:=(strImgDir+'/'+strImgName);
try
MyHttp2.Get(strGetURL);
except
strNewBody:=strNewBody+'"'+strGetURL+'"';
end;
//保存得到的图片
if (pos('image/',MyHttp2.Header)<>0)
then begin //这是图片,将其保存在image目录下
strNewBody:=strNewBody+'"'+strImgDir+'/'+strImgName+'"';
end else
begin //if not succesful ,don't change the URL
strNewBody:=strNewBody+'"'+strGetURL+'"';
end;
end;
end;
end //end "<img"
else //nothing ,skip one character
begin
strNewBody:=strNewBody+strTemp;
inc(i);
end; //end of else
end; //end of in mark
end;//end of while
//插入下载图片
l:= length(strNewBody);
For I:=1 to L do
begin
write(F,StrNewBody);
end;
closeFile(F);
end;