- #include "stdafx.h"
- #include "iostream"
- #include "string"
- #include "fstream"
- #include "io.h"
- #include "assert.h"
- #include "map"
- #include "list"
- #define MOST_FILES 100000
- using namespace std;
- map<string, int> word_count;
- map<string, string> words;
- string filenames[MOST_FILES];
- int files_number = 0;
- void get_files(string);
- void number_count();
- void externed();
- void pause();
- int FindFile(string, string);
- void save_result();
- void add_word(string);
- int is_word(char);
- int is_letter(char);
- string to_lower(string);
- int compare_words(string, string);
- string cut_numbers(string);
- int is_number(char);
- int main(int argc, char* argv[]){
- string filepath;
- if(argc == 3){
- filepath = argv[2];
- }
- else{
- filepath = argv[1];
- }
- get_files(filepath);
- // get_files("e:\\\\test");
- number_count();
- if(argc == 3){
- externed();
- }
- save_result();
- pause();
- return 0;
- }
- void get_files(string filepath){
- FindFile("a.txt", filepath.c_str());
- FindFile("a.cpp", filepath.c_str());
- FindFile("a.h", filepath.c_str());
- FindFile("a.cs", filepath.c_str());
- /*
- for(int i = 0; i < files_number; i++)
- {
- cout << filenames[i] << endl;
- }
- */
- }
- void number_count(){
- for(int i = 0; i < files_number; i++){
- ifstream in_file(filenames[i].c_str());
- if(!in_file){
- cerr << "Open " + filenames[i] + " failed." << endl;
- exit(1);
- }
- string current_word;
- while(in_file >> current_word){
- add_word(current_word);
- // cout << current_word << endl;
- }
- // cout << endl;
- in_file.close();
- }
- // cout << files_number << endl;
- }
- void externed(){
- map<string, string> :: iterator i, j;
- for(i = words.begin(); i != words.end(); i++){
- for(j = words.begin(); (j != words.end()) && (j != i); j++){
- if(cut_numbers(i -> first) == cut_numbers(j -> first)){
- if(compare_words(i -> second, j->second)){
- word_count[i -> first] += word_count[j -> first];
- word_count.erase(j -> first);
- words.erase(j);
- }
- else{
- word_count[j -> first] += word_count[i -> first];
- j = i++;
- word_count.erase(j -> first);
- words.erase(j);
- i--;
- }
- break;
- }
- }
- }
- /* i = words.begin();
- while(i != words.end()){
- i++;
- j = i;
- i--;
- if(j == words.end())
- break;
- if(cut_numbers(i -> first) == cut_numbers(j -> first)){
- word_count[i -> first] += word_count[j -> first];
- word_count.erase(j -> first);
- words.erase(j);
- }
- else
- i++;
- }
- */
- }
- string cut_numbers(string a){
- int i;
- for(i = a.size(); is_number(a[i - 1]); i--)
- ;
- return a.substr(0, i);
- }
- void save_result(){
- ofstream out_file("result.txt");
- list<int> value;
- map<string, int> :: iterator i;
- list<int> :: iterator j;
- if(!out_file){
- cerr << "Open file to write failed" << endl;
- // pause();
- exit(1);
- }
- for(i = word_count.begin(); i != word_count.end(); i++){
- for(j = value.begin(); (j != value.end()) && (i -> second < *j); j++)
- ;
- if((j != value.end()) && (i -> second == *j))
- continue;
- else
- value.insert(j, i -> second);
- }
- for(j = value.begin(); j != value.end(); j++){
- for(i = word_count.begin(); i != word_count.end(); i++){
- if(*j == i -> second){
- out_file << words[i -> first] << ": " << *j << endl;
- }
- }
- }
- out_file.close();
- }
- void pause(){
- cout << "Press enter to continue..." << endl;
- getchar();
- }
- void add_word(string word){
- string tmp;
- for(unsigned i = 0; i < word.size(); i++){
- tmp = "";
- for(int j = i; is_word(word[j]); j++){
- tmp += word[j];
- i = j;
- // cout << tmp << endl;
- }
- int k = 0;
- for(k = 0; k < 4; k++){
- if(!is_letter(tmp[k])){
- break;
- }
- }
- if(k < 4)
- continue;
- word_count[to_lower(tmp)]++;
- if(words.count(to_lower(tmp))){
- if(compare_words(tmp, words[to_lower(tmp)])){
- words[to_lower(tmp)] = tmp;
- }
- }
- else{
- words[to_lower(tmp)] = tmp;
- }
- }
- }
- int is_word(char ch){
- return (((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z')) || ((ch >= '0') && (ch <= '9'))) ? 1 : 0;
- }
- int is_letter(char ch){
- return (((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z'))) ? 1 : 0;
- }
- int is_number(char ch){
- return ((ch >= '0') && (ch <= '9')) ? 1 : 0;
- }
- string to_lower(string st){
- for(unsigned i = 0; i < st.size(); i++){
- if(st[i] >= 'A' && st[i] <= 'Z')
- st[i] += 32;
- }
- return st;
- }
- int compare_words(string a, string b){;
- unsigned i;
- for(i = 0; i < a.size() && i < b.size(); i++){
- if(a[i] < b[i])
- return 1;
- if(a[i] > b[i])
- return 0;
- }
- if(i < b.size())
- return 1;
- return 0;
- }
- int FindFile(string fileName, string filePath)
- {
- assert(fileName != "" && filePath != "");
- string exeName = fileName.substr(fileName.find_last_of('.'));
- string strPath = filePath;
- string filiterName = "*.*";
- if ( strPath[strPath.length() - 1] != '\\\\')
- {
- strPath = strPath + "\\\\";
- }
- _finddata_t fileInfo;
- long handle = _findfirst((strPath + filiterName).c_str(), &fileInfo);
- if (handle == -1L)
- {
- cout<<"Cannot Open The Path!"<<endl;
- // pause();
- exit(1);
- }
- do
- {
- string path = fileInfo.name;
- if (fileInfo.attrib & _A_SUBDIR)
- {
- if (strcmp(fileInfo.name, ".") != 0 && strcmp(fileInfo.name, "..") != 0)
- {
- FindFile(fileName, strPath + path + "\\\\");
- }
- }
- else if (fileInfo.attrib & _A_ARCH && path.substr(path.find_last_of('.')) == exeName)
- {
- // cout<<strPath + fileInfo.name<<endl;
- filenames[files_number] = strPath + fileInfo.name;
- files_number++;
- }
- }while (_findnext(handle, &fileInfo) == 0);
- _findclose(handle);
- return 0;
- }
- //该片段来自于http://www.codesnippet.cn/detail/160520149611.html
来源: http://www.codesnippet.cn/detail/160520149611.html