如何用delphi从intenet中的htm文件中只下载文本内容?(100分)

  • 主题发起人 主题发起人 xjlqs
  • 开始时间 开始时间
X

xjlqs

Unregistered / Unconfirmed
GUEST, unregistred user!
如何用delphi从intenet中的htm文件中只下载文本内容?
 
这是不可能的
你唯一能做的就是先把htm下载下来,再把里面的文本提取出来。
 
你可以参考Delphi的例子,HTTPClient。下载后然后自己分析Html文件,滤掉图片脚本
 
//用了idHTTP控件。
unit Unit1;

interface

uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, NMURL, StdCtrls, ExtCtrls, IdBaseComponent, IdComponent,
IdTCPConnection, IdTCPClient, IdHTTP, ComCtrls;

type
TForm1 = class(TForm)
Memo1: TMemo;
Panel1: TPanel;
Button1: TButton;
Edit1: TEdit;
IdHTTP1: TIdHTTP;
ProgressBar1: TProgressBar;
procedure Button1Click(Sender: TObject);
procedure IdHTTP1WorkBegin(Sender: TObject; AWorkMode: TWorkMode;
const AWorkCountMax: Integer);
procedure IdHTTP1WorkEnd(Sender: TObject; AWorkMode: TWorkMode);
procedure IdHTTP1Work(Sender: TObject; AWorkMode: TWorkMode;
const AWorkCount: Integer);
private
{ Private declarations }
public
{ Public declarations }
end;

var
Form1: TForm1;

implementation

{$R *.dfm}

function FilterBody(str:String):Integer;
var
filter:Boolean;
i,len:Integer;
ResultStr:String;
begin
len:=Length(str);
filter:=False;
ResultStr:=' ';
Result:=1;
for i := 1 to len do
begin
if str='<' then
begin
ResultStr:=upperCase(str[i+1]+str[i+2]+str[i+3]+str[i+4]);
if ResultStr='BODY' then filter:=True;
end;
if (filter=true) and (str='>') then
begin
Result:=i+1;
filter:=False;
break;
end;
end;

end;

function FilterStr(str:String):String;
var
Filter:Boolean;
i,len:Integer;
ResultStr,FilterStr,isbody:String;
begin
Filter:=False;
ResultStr:=' ';
len:=Length(str);
for i := FilterBody(str) to len do
begin
if str='<' then Filter:=True;
if str='>' then begin Filter:=False;str:=' '; end;
if Filter then
begin
FilterStr:=' ';
FilterStr:=upperCase(str+str[i+1]+str[i+2]);
ResultStr:=ResultStr;
if pos('<BR',FilterStr)>0 then
ResultStr:=ResultStr+#13#10;
if pos('<P',FilterStr)>0 then
ResultStr:=ResultStr+#13#10;
if pos('<H',FilterStr)>0 then
ResultStr:=ResultStr+#13#10;
if pos('</H',FilTerStr)>0 then
ResultStr:=ResultStr+#13#10;
if pos('</P',FilTerStr)>0 then
ResultStr:=ResultStr+#13#10;
if pos('<HR',FilTerStr)>0 then
ResultStr:=ResultStr+#13#10;
end
else
ResultStr:=ResultStr+str;
end;
Result:=Trim(ResultStr);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
url,body:String;
begin
url:=Edit1.Text;
body:=idHTTP1.Get(url);
memo1.Clear;
memo1.Text:=FilterStr(body);
end;

procedure TForm1.IdHTTP1WorkBegin(Sender: TObject; AWorkMode: TWorkMode;
const AWorkCountMax: Integer);
begin
ProgressBar1.Max:=AWorkCountMax;
end;

procedure TForm1.IdHTTP1WorkEnd(Sender: TObject; AWorkMode: TWorkMode);
begin
progressBar1.Position:=0;
end;

procedure TForm1.IdHTTP1Work(Sender: TObject; AWorkMode: TWorkMode;
const AWorkCount: Integer);
begin
progressBar1.Position:=AWorkCount;
end;

end.
 
Indy Clients页中的
IdHtt控件
HTTP.Request下面的属性
 
下栽后在提取,我自己也会
 
用webbrows打开页面,复制到memo,再保存到txt文件即可
 
请各位告诉我如何用程序复制webbrows的内容
 
if WebBrowser1.Document <> nil then
begin
WebBrowser1.ExecWB(OLECMDID_selectall,OLECMDEXECOPT_DODEFAULT);
WebBrowser1.ExecWB(OLECMDID_COPY,OLECMDEXECOPT_DODEFAULT);
end;
 
接受答案了.
 
后退
顶部