function TSearchThread.dealwith(url,baseurl:string):string; //处理网页路径
var i:integer;
beginpos:integer;
begin
URL:=stringreplace(url,'"','',[rfReplaceAll]);
if url[1]+url[2]+url[3]+url[4]+url[5]+url[6]+url[7]='http://' then
dealwith:=url;
URL:=stringreplace(url,'/','/',[rfReplaceAll]);
if pos(' ',url)<>0 then
begin
url:=copy(url,1, pos(' ',url)-1);
end;
for i:=length(baseurl) downto 1 do
begin
if baseurl='/' then
BEGIN
break;
END;
end;
IF I<>1 THEN
baseurl:=copy(baseurl,1,i)
ELSE
BASEURL:=BASEURL+'/';
if ((pos('.',url)<>0)and(pos('./',url)=0)and(url[1]<>'/')) or (pos('www.',url)<>0) then
dealwith:=url;
if (pos('../',url)=0)AND (POS('./',URL)=0) then //没有指定目录//??要改改
begin
if url[1]='/' then
url:=copy(url,2,length(url)-1);
dealwith:=baseurl+url;
end
else
begin
i:=length(baseurl)-1;
while pos('../',url)<>0 do
begin
beginpos:=pos('../',url);
delete(url,1,beginpos+2);
while i>=1 do
begin
if baseurl='/' then
begin
dec(i);
break;
end;
dec(i);
end;
end;
dealwith:=copy(baseurl,1,i)+'/'+url;
end;
dealwith:=url;
end;
function TSearchThread.getsrc(url:string):tstringlist;//普通网页
var strlist:tstringlist;
temp:string;
beginpos,endpos:integer;//取字符串的开始与结束
i:integer;
rightcount:integer;//符合的个数
resultlist:tstringlist;
query:tquery;
begin
if (form1.searchedlist.IndexOf(url)<>-1) then
exit;
form1.searchedlist.Add(url);
strlist:=tstringlist.Create ;
resultlist:=tstringlist.Create ;
temp:=getcontent(url);//得到该网页的内容
form1.Memo1.Lines.add(url);
//--------过滤网页是否符合条件
beginpos:=pos('<title>',temp);
endpos:=pos('</title>',temp);
if (copy(temp,beginpos+7,endpos-beginpos-6)='没有可以显示的页') or (copy(temp,beginpos+7,endpos-beginpos-6)='cannot find server page') then
exit;
if pos(cankao,url)<>0 then //保存内容到数据库
begin
form1.StatusBar1.Panels[1].text:=inttostr(strtoint(form1.StatusBar1.Panels[1].text)+1);
form1.memo2.Lines.add('insert');
savedata(url,temp);
end;
beginpos:=pos('href=',temp);
while beginpos<>0 do
begin
temp:=copy(temp,beginpos+5,length(temp)-beginpos-4);
endpos:=pos('>',temp);
if endpos>0 then
begin
strlist.Add(copy(temp,1,endpos-1));
delete(temp,1,endpos);
beginpos:=pos('href=',temp);
end
else
break;//说明此为非法内容
end;
//--------------处理URL
rightcount:=0;
for i:=0 to strlist.Count-1 do
begin
strlist.strings:=dealwith(strlist.strings,url);
if (form1.searchedlist.IndexOf(strlist.strings)=-1) then
//and(pos(cankao,strlist.strings)<>0)本来这个想加的,但有时会掩盖掉一些有用的东西
begin
resultlist.add(strlist.strings);
end;
end;
strlist.free;
result:=resultlist;
end;
贴出来了你自己看看!
不是太好!
呵给个参考吧