Click here to Skip to main content
15,879,095 members
Please Sign up or sign in to vote.
1.00/5 (6 votes)
See more:
file is here:
Computer science or computing
science (abbreviated CS or CompSci) is the scientific
approach to computation and its applications.
A computer scientist specialises in the theory of
computation and the design of computers or computational
systems

#include " stdafx.h "
#include < fstream >
#include < iostream >
#include < cctype >
#include < string >
#include < algorithm >
#include <map>
#include < vector >
#include < string >
#include < set >
#include < cstddef >
#include < sstream >


using namespace std;

std::string int_to_string(int);
// tabdil int  i into s C++ string
string int_to_string(int i) {
 	stringstream out;
	out<< i ;
	
	return out.str();
}


//an index to a file.

class FileIndex {
 public:
  FileIndex(int);
  FileIndex* getNext();
  int getIndex();
  void setNext(FileIndex*);

 private:
  int index;       // The index of the file a word was found in.
  FileIndex* next; // The next file index.
};

// The class  for the inverted index generator.
class InvertedIndexGen {
 public:
    InvertedIndexGen();
    ~InvertedIndexGen();
    int build(const std::string&);
    FileIndex* lookup(const std::string&);
	void to_set(std:: set<int>&s, FileIndex* ) ;
    std::string toString();
    int numberOfWords();
 private:
    std::map<std::string,> idx;
    
    // 
    int loadIndexFile(std::vector<std::string xmlns:std="#unknown">&, const std::string&);
    int indexFiles(const std::vector<std::string>&);
    int readWords(const std::string&, std::vector<std::string>&);
    void insert(const std::string&, int);
};


InvertedIndexGen::InvertedIndexGen() {
    // Default constructor.
}


InvertedIndexGen::~InvertedIndexGen() {
    map<string,>::iterator it = idx.begin();
    // iterate over each map element pair.
    while (it != idx.end()) {
        FileIndex* fi = it->second;
        // Now, delete each file index.
        while (fi != NULL) {
            FileIndex* p = fi;
            fi = fi->getNext();
            delete p;
        }
        it++;
    }
    // Next, delete all map entries.
    idx.clear();
}


// Takes a file name as an argument and builds the inverted index.
int InvertedIndexGen::build(const string& file) {
    vector<string> files;
    if (loadIndexFile(files, file) == -1)
        return -1;
    if (indexFiles(files) == -1)
        return -1;
    return 0;
}



// Looks up a word in the inverted index.
FileIndex* InvertedIndexGen::lookup(const string& word) {
    return idx[word];
}











// Returns true if c is an alpha character.
int alpha(char c) {
	return isalpha(c) ;
}

// Returns true if c is not an alpha character.
bool not_alpha(char c) {
	return !isalpha(c);
}





// This method splits the string str into a vector of strings.  That is, we
// split a sentence into words.
//
vector<string> split(const string& str) {
	vector<string> ret;
	string::const_iterator i = str.begin();
	while (i != str.end()) {
		// Ignore unrecognized characters (non-alpha):
		i = find_if(i, str.end(), alpha);

		// Find the next alpha word:
		string::const_iterator j = find_if(i, str.end(), not_alpha);

		// Copy the characters in [i, j)
		if (i != str.end())
			ret.push_back(string(i, j));

		i = j;
	}
	return ret;
}






// This method reads the words in the provided file into the vector v.
//
int InvertedIndexGen::readWords(const string& file, vector<string>& v) {
	std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
	if (infile) {
		std::string line;
		while (getline(infile, line)) {
			vector<string> words = split(line);
			v.insert(v.end(), words.begin(), words.end());
		}
		return 0;
	}
	else {
		cerr << "can't open file " << file << endl;
		return -1;
	}
}



//
// This method converts the FileIndex list fi into a set of
// integers representing the file indexes.
//
void InvertedIndexGen :: to_set(set<int>&s, FileIndex* fi) {
    FileIndex* p = fi;
    while (p != NULL) {
        s.insert(p->getIndex());
        p = p->getNext();
    }
}





// Indexes each file in the files vector.
int InvertedIndexGen::indexFiles(const vector<string>& files) {
    // Uncomment the following lines:
    vector<string> words; // Words in a file.
    set<string>    seen;  // Files we have "seen" (already indexed).
    int            fcnt;  // The file we are indexing.
    string         curr_file;
    string         curr_word;
    int status = 0;

    fcnt = 0;


    for (vector<string>::const_iterator file_it = files.begin(); file_it != files.end(); ++file_it){
        curr_file = *file_it;
        if(seen.find(curr_file) != seen.end()){
            seen.insert(curr_file);
            if(readWords(curr_file, words) != -1){
                for (vector<string>::iterator words_it = words.begin(); words_it != words.end(); ++words_it){
                    curr_word = *words_it;
                    insert(curr_word, fcnt);
                    words.clear();
                }
                fcnt++;
            }
            else {
                status = -1;
                fcnt++;
            }
        }
        else {
            status = -1;
            cout << "duplicate input file: " << curr_file << ". Skipping." << endl;
            fcnt++;
        }
    }
    return status;
}



// Inserts a word into the inverted index.
void InvertedIndexGen::insert(const string& word, int fcnt) {
    if(idx.find(word) == idx.end()) {                // word not seen
        FileIndex* newEntry = new FileIndex(fcnt);  // add new pair to idx map
        idx.insert(pair<string,>(word, newEntry) );
        return;
    }
    else {                                           // word has been seen
        FileIndex* curr = lookup(word);
        while(curr->getIndex() != fcnt && curr->getNext() != NULL){  // iterate through word's FileIndex objects
            curr = curr->getNext();
        }
        if((curr->getIndex() == fcnt)) {  // if there's an index match, do nothing
            return;
        }
        else {                        // if there's no match, add new FileIndex pointer to value list
            FileIndex* addIndex = new FileIndex(fcnt);
            curr->setNext(addIndex);
        }
    }
}



// Loads the index file into the vector files.
int InvertedIndexGen::loadIndexFile(vector<string>& files, const string& idxfile) {
	std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
	if (infile) {
		std:: string line;
		int lineno = 1;
		while (getline(infile, line)) {
			if (line == "")
				cerr << "[" << int_to_string(lineno)
				     << "] found blank line in input file. skipping." << endl;
			else
				files.push_back(line);
			lineno++;
		}
		return 0;
	}
	else {
		cerr << "can't open file " << idxfile << endl;
		return -1;
	}
}




// Creates a new file index given the index i.
//
FileIndex::FileIndex(int i) {
	index = i;
  next  = NULL;
}

//
// Returns the next file index in this list.
//
FileIndex* FileIndex::getNext() {
  return next;
}

//
// Sets the next file index in this list.
//
void FileIndex::setNext(FileIndex* n) {
  next = n;
}

//
// Returns the index.
//
int FileIndex::getIndex() {
  return index;
}


//Returns the string representation of the inverted index.
string InvertedIndexGen::toString() {
    set<int>indexes;
    string   res = "";
    map<string,>::iterator it = idx.begin();
//    int *it2 ;
    while (it != idx.end()) {
        res += it->first + ": ";
        to_set(indexes, it->second);
        for (set<int>::iterator it2 = indexes.begin();
             it2 != indexes.end(); ++it2) {
            res += int_to_string(*it2) + " ";
        }    
        res += "\n";    
        indexes.clear();
        it++;
    }

    return res;
}

int InvertedIndexGen::numberOfWords() {
	return idx.size();
}




// Main program entry point.
int main(int argc, char* argv[]) {
	//Check the program arguments.
	/*if (argc != 2) {
		std::cerr << "usage: InvIndexer file" << endl;
		return 1;
	}*/
	cout << "Test" << endl;
	int s;
	InvertedIndexGen ivgen;    	// Create the inverted index generator.
	ivgen.build(argv[0]);		// Build the index.
	cout << ivgen.toString();	// Return the a string representation of the index.
	cout << ivgen.numberOfWords() << " words" << endl;
	cin>>s;
	return 0;
}
Posted
Updated 19-Dec-13 3:35am
v6
Comments
Albert Holguin 18-Dec-13 15:20pm    
Where is the error? ...you can't just do a code dump and expect us to go through every one of your lines of code.
Richard MacCutchan 18-Dec-13 15:29pm    
Your loadIndexFile function does nothing useful. Instead of just dumping your code here and expecting other people to sorti it out for you, make an effort to use your debugger and trace through what is actually happening.
[no name] 18-Dec-13 16:21pm    
Thanks
CPallini 18-Dec-13 16:22pm    
My (virtual) 5. Learning a bit of C++ would also help, in my opinion.
Richard MacCutchan 18-Dec-13 16:35pm    
See their other questions, all much the same style.

Running the application under the debugger, go to the line indicated by the assertion message. Put a break point on it. Run application again until the execution stops at the break point. Find out which object is supposed to be non-null. Open the Debug Window "Call stack" and browse through the stack to see where wrong data come from. Fix it.

—SA
 
Share this answer
 
You have called the program without argument: it expects a filename. If you don't provide one, argv[] will have only one entry, and argv[1] is an invalid pointer.
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900