- #include "stdafx.h"
- #include <afxinet.h>
- #include <atlsimpstr.h>
- #include <fstream>
- #include <iostream>
- #include <sstream>
- #include <set>
- using namespace std;
- #ifdef _DEBUG
- #define new DEBUG_NEW
- #endif
- int GetHttpFileData(CString strUrl,char* DownloadHtmFileName);
- int ParseHomePageDownloadFile(char* szfileName);
- int UTF8Str2GBK(const string& strUTF8,string& strGBK);
- void GetHomePageRecommend(char* szName,const string& strGbk);
- // 唯一的应用程序对象
- CWinApp theApp;
- using namespace std;
- int ParseUpdateFile(char* szfileName)
- {
- int iRet = -1;
- if(NULL == szfileName)
- return iRet;
- fstream fs(szfileName);
- stringstream ss ; // 创建字符串流对象
- ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
- fs.close();
- string str = ss.str(); // 获取流中的字符串
- string strGbk;
- int i = UTF8Str2GBK(str,strGbk);
- if(strGbk.size() == 0 || i != 0)
- {
- cerr << "transfer utf8 to gbk error" << endl;
- return iRet;
- }
- basic_string <char>::size_type keyWordStart = strGbk.find("<title>");
- basic_string <char>::size_type keyWordEnd = strGbk.find("</title>",keyWordStart+1);
- if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
- {
- string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7);
- cout << strKeyWord << endl;
- }
- keyWordStart = strGbk.find("<div class=\\"cv-title\\">");
- keyWordEnd = strGbk.find("</div>",keyWordStart+1);
- if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
- {
- string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22);
- cout << strKeyWord << endl;
- }
- iRet = 0;
- return iRet;
- }
- void ShowUpdateInfo(char* szHtmAddress)
- {
- if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile"))
- {
- cerr << "GetHttpFileData error once" << endl;
- }
- if( 0 != ParseUpdateFile("HtmDownloadFile"))
- {
- cerr << "ParseUpdateFile error once" << endl;
- }
- }
- void ShowHomePageElement(char* szHomePageAddress)
- {
- if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile"))
- {
- cerr << "GetHttpFileData error once" << endl;
- }
- if( 0 != ParseHomePageDownloadFile("HtmDownloadFile"))
- {
- cerr << "GetHttpFileData error once" << endl;
- }
- }
- int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
- {
- int nRetCode = 0;
- // 初始化 MFC 并在失败时显示错误
- if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
- {
- // TODO: 更改错误代码以符合您的需要
- _tprintf(_T("错误: MFC 初始化失败\\n"));
- nRetCode = 1;
- }
- else
- {
- // TODO: 在此处为应用程序的行为编写代码。
- ShowHomePageElement("http://www.verycd.com/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/790244/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/519062/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/780306/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/522227/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/507338/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/515005/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/794197/");
- cout << "****************************************************" << endl;
- ShowUpdateInfo("http://www.verycd.com/entries/511135/");
- cout << "****************************************************" << endl;
- }
- system("pause");
- return nRetCode;
- }
- int UTF8Str2GBK(const string& strUTF8,string& strGBK)
- {
- int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
- WCHAR *wsz = NULL;
- TCHAR *tsz = NULL;
- int iRet = -1;
- wsz = new WCHAR[i+1];
- if( NULL == wsz)
- {
- goto UTF8Str2GBK_EXIT;
- }
- MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i);
- i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL);
- tsz = new TCHAR[i+1];
- if( NULL == tsz)
- {
- goto UTF8Str2GBK_EXIT;
- }
- WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL);
- strGBK = string(tsz);
- iRet = 0;
- UTF8Str2GBK_EXIT:
- delete []wsz;
- delete []tsz;
- return iRet;
- }
- int ParseHomePageDownloadFile(char* szfileName)
- {
- int iRet = -1;
- if(NULL == szfileName)
- return iRet;
- fstream fs(szfileName);
- stringstream ss ; // 创建字符串流对象
- ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
- fs.close();
- string str = ss.str(); // 获取流中的字符串
- string strGbk;
- int i = UTF8Str2GBK(str,strGbk);
- if(strGbk.size() == 0 || i != 0)
- {
- cerr << "transfer utf8 to gbk error" << endl;
- return iRet;
- }
- cout << "首页大推" << endl;
- GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk);
- cout << "首页小推" << endl;
- GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk);
- iRet = 0;
- return iRet;
- }
- void GetHomePageRecommend(char* szName,const string& strGbk)
- {
- set<string> setKeyWord;
- //cout << strGbk;
- basic_string <char>::size_type keyWordStart = strGbk.find(szName);
- basic_string <char>::size_type keyWordEnd = strGbk.find("')",keyWordStart+1);
- if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
- {
- string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
- setKeyWord.insert(strKeyWord);
- //cout << "电驴首页小推 " << strKeyWord << endl;
- }
- while( keyWordStart != string::npos && keyWordEnd != string::npos)
- {
- keyWordStart = strGbk.find(szName,keyWordEnd+1);
- keyWordEnd = strGbk.find("')",keyWordStart+1);
- if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
- {
- string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
- setKeyWord.insert(strKeyWord);
- //cout << "电驴首页小推 " << strKeyWord << endl;
- }
- }
- set<string>::iterator pos;
- for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos)
- {
- cout << "电驴首页推荐 " << *pos << endl;
- }
- }
- int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName)
- {
- CInternetSession Session("Internet Explorer", 0);
- CHttpFile *pHttpFile = NULL;
- CString strData;
- CString strClip;
- int iRet = -1;
- if(szDownloadHtmFileName == NULL)
- {
- cerr << "DownloadHtmFileName is NULL" << endl;
- Session.Close();
- return iRet;
- }
- ofstream of(szDownloadHtmFileName);
- if (of.bad())
- {
- cerr << "of create file error" << endl;
- Session.Close();
- return iRet;
- }
- try
- {
- pHttpFile = (CHttpFile*)Session.OpenURL(strUrl);
- while ( pHttpFile->ReadString(strClip) )
- {
- of << strClip;
- }
- }catch(CInternetException* pEx)
- {
- TCHAR pszError[64];
- pEx->GetErrorMessage(pszError, 64);
- cerr << __FUNCTION__ << pszError << endl;
- goto GetHttpFileData_EXIT;
- }
- iRet = 0;
- GetHttpFileData_EXIT:
- Session.Close();
- of.close();
- return iRet;
- }
- //该片段来自于http://www.codesnippet.cn/detail/0903201511849.html
来源: http://www.codesnippet.cn/detail/0903201511849.html