怎么获得网页上<table>表格里的文字数据(100)

  • 主题发起人 主题发起人 wlyft
  • 开始时间 开始时间
我用以下的方法有兴趣的可试试,取出来就是一个表格里所有的文字,但这不便于取出我想要的数据function GetHtmlTableCell(aTable:IHTMLTable;aRow,aCol:integer):IHTMLElement;var Row:IHTMLTableRow; node:IHTMLElement;begin Row:=aTable.rows.item(aRow,aRow) as IHTMLTableRow; //Varempty Result:=Row.cells.item(aCol,aCol) as IHTMLElement; // Varemptyend;function GetHtmlTable(aDoc:IHTMLDocument2;aIndex:Integer):IHTMLTable;var list:IHTMLElementCollection;begin list:=aDoc.all.tags('table') as IHTMLElementCollection; Result:=list.item(aIndex,aIndex) as IHTMLTable; // Varemptyend;procedure Tform1.getlastframe(Doc:IHTMLDocument2);var ElementCollection:IHTMLElementCollection; FrameWindow:IHTMLWindow2; j:integer; Vi,Vj:OLEVariant; Dispatch: IDispatch; tblintf:IHTMLTable; x,y:integer; exit1,exit2:boolean; tmpstr:string; chils:IHTMLElementCollection; i:integer; Dispatch11: IDispatch; IHTMLInputElement11:HTMLInputElement;beginif Doc.frames.length =0 then begin ElementCollection:=Doc.Get_All; //DoWithHtmlElement(doc,ElementCollection); end else begin for j:=0 to Doc.frames.length -1 do begin Vj:=j; memo1.Lines.Add('-------------------------------------------------------'); memo1.Lines.Add(doc.body.outerHTML); memo1.Lines.Add('-------------------------------------------------------'); Dispatch:=Doc.frames.item(Vj); if Succeeded(Dispatch.QueryInterface(IHTMLWindow2,FrameWindow)) then begin // DoWithHtmlElement(doc,doc.get_all); if pos('table',doc.body.outerHTML)>0 then begin tblintf:=GetHtmlTable(doc,strtoint(edit4.text)); memo2.Lines.Add ( GetHtmlTableCell(tblintf,strtoint(edit2.text),strtoint(edit3.text)).innerText); end; getLastFrame(FrameWindow.document); end; end; end;end;procedure Tform1.FillIEForm(aURL:string) ;var ShellWindow: IShellWindows; Web: IWebBrowser2; Dispatch: IDispatch; i,j:integer; IEAddress:string; HTMLDocument:IHTMLDocument2; ElementCollection:IHTMLElementCollection; FrameWindow:IHTMLWindow2; Vi,Vj:OLEVariant; HTMLFrameBase :IHTMLFrameBase ; HTMLFrameElement:IHTMLFrameElement ; HTMLIFrameElement:IHTMLIFrameElement;begin HTMLDocument:=webbrowser1.document as ihtmldocument2; if HTMLDocument<>nil then begin if HTMLDocument.frames.length =0 then//无框架 begin ElementCollection:=HTMLDocument.Get_All; DoWithHtmlElement(HTMLDocument,ElementCollection); end else//有框架 begin for j:=0 to HTMLDocument.frames.length -1 do begin Vj:=j; Dispatch:=HTMLDocument.frames.item(Vj); if Succeeded(Dispatch.QueryInterface(IHTMLWindow2,FrameWindow)) then begin getLastFrame(FrameWindow.document); end; End; end; end; end;
 
将HTML里的标识符全部清空就是文字了。
 
后退
顶部