如何直接提取页面文本?(50分)

  • 主题发起人 主题发起人 nau
  • 开始时间 开始时间
N

nau

Unregistered / Unconfirmed
GUEST, unregistred user!
给定一个url,如何直接提取该页的页面文本?
这是copy_paste给我的完整的程序代码,我编译通过了,但还是取不到页面文本,
怎么办啊?请帮帮忙!
unit Unit1;

interface

uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;

type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;

var
Form1: TForm1;
implementation
uses WinInet;
{$R *.dfm}

function DownloadFile(const AURL: string): string;

procedure Add(Buf: PChar; Count: Integer);
var
Len: Integer;
begin
Len := Length(Result);
SetLength(Result, Len + Count);
Move(Buf^,Result[Len + 1],Count);
end;

var
BytesRead: DWORD;
Session, Connection: HINTERNET;
Buffer: array[1..1024] of Char;
begin
Result := '';
Session := InternetOpen(nil, INTERNET_OPEN_TYPE_DIRECT, nil, nil, 0);
if Assigned(Session) then
try
Connection := InternetOpenUrl(Session, PChar(AURL), nil,INTERNET_FLAG_RAW_DATA,INTERNET_FLAG_RELOAD,0);
if Assigned(Connection) then
try
repeat
FillChar(Buffer, SizeOf(Buffer), 0);
InternetReadFile(Connection, @Buffer, SizeOf(Buffer), BytesRead);
if BytesRead > 0 then
Add(@Buffer, BytesRead);
Application.ProcessMessages;
until BytesRead = 0;
finally
InternetCloseHandle(Connection);
end;
finally
InternetCloseHandle(Session);
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
begin
Memo1.Text := DownloadFile('http://www.sina.com.cn');
end;

end.
 
unit Main;

interface

uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, OleCtrls, SHDocVw;

type
TMainForm = class(TForm)
WebBrowser: TWebBrowser;
Edit: TEdit;
Button: TButton;
Memo: TMemo;
procedure ButtonClick(Sender: TObject);
procedure WebBrowserDocumentComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
private
{ Private declarations }
public
{ Public declarations }
end;

var
MainForm: TMainForm;

implementation

uses
MSHTML_TLB;

{$R *.dfm}

procedure TMainForm.ButtonClick(Sender: TObject);
begin
Memo.Clear;
WebBrowser.Navigate(WideString(Edit.Text))
end;

procedure TMainForm.WebBrowserDocumentComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
var
Doc: IHTMLDocument2;
Index: OleVariant;
Frame: IDispatch;
Window: IHTMLWindow2;
begin
WebBrowser.Document.QueryInterface(IID_IHTMLDocument, Doc);
Memo.Text:=doc.body.innerHTML
end;

end.
 
一定要用webbroser吗?
我试试你的方法,谢谢
 
我把MSHTML_TLB通过import导进去后,还是出错,怎么回事?
我是个新手!
 


http://www.delphibbs.com/delphibbs/dispq.asp?lid=1832357

 
请影子和LeeChange接分!
非常感谢!!!
 
影子,对不起,我搞错了,我是新手。
请在下面的帖接分!
http://www.delphibbs.com/delphibbs/dispq.asp?lid=1860410
如果可能,请留下email给我,以后方便请教。
 

不敢说指教,那些代码忘记是什么时候抄下来的,呵呵

至于email,我的资料上有。
shadow@acec.com.cn
 
后退
顶部