我有vc的代码,谁帮我翻译过来!具体见http://www.codeproject.com/internet/parse_html.asp
特别是前段不用webbrow类如何取得IHTMLDOCUment2的.因为我要在dll里用此功能。
CWaitCursor wait;
if(m_csFilename.IsEmpty()){
AfxMessageBox(_T("Please specify the file to parse"));
return;
}
CFile f;
//let's open file and read it into CString (u can use any buffer to read though
if (f.Open(m_csFilename, CFile::modeRead|CFile::shareDenyNone)) {
m_wndLinksList.ResetContent();
CString csWholeFile;
f.Read(csWholeFile.GetBuffer(f.GetLength()), f.GetLength());
csWholeFile.ReleaseBuffer(f.GetLength());
f.Close();
//declare our MSHTML variables and create a document
MSHTML::IHTMLDocument2Ptr pDoc;
MSHTML::IHTMLDocument3Ptr pDoc3;
MSHTML::IHTMLElementCollectionPtr pCollection;
MSHTML::IHTMLElementPtr pElement;
HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2, (void**)&pDoc);
//put the code into SAFEARRAY and write it into document
SAFEARRAY* psa = SafeArrayCreateVector(VT_VARIANT, 0, 1);
VARIANT *param;
bstr_t bsData = (LPCTSTR)csWholeFile;
hr = SafeArrayAccessData(psa, (LPVOID*)&param);
param->vt = VT_BSTR;
param->bstrVal = (BSTR)bsData;
hr = pDoc->write(psa);
hr = pDoc->close();
SafeArrayDestroy(psa);
//I'll use IHTMLDocument3 to retrieve tags. Note it is available only in IE5+
//If you don't want to use it, u can just run through all tags in HTML
//(IHTMLDocument2->all property)
pDoc3 = pDoc;
//display HREF parameter of every link (A tag) in ListBox
pCollection = pDoc3->getElementsByTagName(L"A");
for(long i=0; i<pCollection->length; i++){
pElement = pCollection->item(i, (long)0);
if(pElement != NULL){
//second parameter says that you want to get text inside attribute as is