Script started on Mon Mar 31 19:51:38 2003 [root@Fizzgig /root]# cd C [root@Fizzgig C]# cat makefile wordtree: main.o tree.o node.o g++ -g main.o tree.o node.o -o wordtree main.o: main.cpp g++ -c main.cpp tree.o: tree.cpp tree.hpp g++ -c tree.cpp node.o: node.cpp node.hpp g++ -c node.cpp [root@Fizzgig C]# cat main.cpp /* Builds a tree of word nodes */ /* in order to determine certain details about a text file */ /* Author: Zach Tomaszewski */ /* Date: 29 Mar 2003 */ #include "tree.hpp" //includes node.hpp #include #include #include #define MAXWORDLENGTH 50 #define NONWORDCHARS "0123456789!?.,;:\"()[]&\\" Node* root; int main(int argc, char* argv[]){ char* filename; char* substr = "a"; if (argc == 2) { //one arg = filename filename = argv[1]; }else if (argc == 4 && strcmp(argv[1], "-s") == 0){ substr = argv[2]; filename = argv[3]; }else { cout << "\nUsage:\t wordtree [-s substring] filename" << endl; cout << "\n -s\tGives a count of the words containing "; cout << "\"substring\"."; cout << "\n \tDefault substring is \"a\".\n" << endl; return(1); } FILE* fileIn = fopen(filename, "r"); char nextWord[MAXWORDLENGTH]; while(fscanf(fileIn, "%s", nextWord) != EOF){ //drop any undesirable chars and lowercase //copy to avoid memory leak if (strtok(nextWord, NONWORDCHARS) != NULL){ strcpy(nextWord, strtok(nextWord, NONWORDCHARS)); }else{ strcpy(nextWord, ""); //no real word present } for(int i=0; iinsert(nextWord); } } root->printTree(); cout << "There are " << countNodes(root) << " unique words.\n"; cout << countContainingNodes(root, substr); cout << " unique words contain '" << substr << "'.\n"; cout << "The most frequently-occuring word occurs "; cout << mostFrequentWord(root) << " times.\n"; fclose(fileIn); return(0); } [root@Fizzgig C]# cat node.hpp #include #include #include class Node { public: Node(char* word, Node* left=NULL, Node* right=NULL); //constructor ~Node(); char* getWord() const; //returns a copy of the word int getCount() const {return count;} Node* getLeftNode() const {return left;} Node* getRightNode() const {return right;} Node* insert(char* newWord); //inserts a word into the tree void printTree(); //prints the tree private: char* word; //holds this node's word int count; //a count of this word's frequency Node* left; //the subnodes in the tree Node* right; }; [root@Fizzgig C]# cat node.cpp /* A class of nodes to use in building a tree. */ /* Each node contains a word and its frequency count. */ /* Author: Zach Tomaszewski */ /* Date: 29 Mar 2003 */ #include "node.hpp" //Constructors + Destructors /* Takes a word, and two Node pointers (default = NULL) Copies the word and not just the pointer. */ Node::Node(char* word, Node* left, Node* right){ this->word = strcpy(new char[strlen(word)+1], word); this->count = 1; this->left = left; this->right = right; } /* Destructor. */ Node::~Node(){ delete(word); } //Methods /* Returns a COPY of the word contained in a node. (Does NOT return a pointer to the word itself.) */ char* Node::getWord() const{ char* wordcopy = new char[strlen(word)+1]; strcpy(wordcopy, word); return wordcopy; } /* Inserts the given word in sorted order (by strcmp()) into the tree. If the word is already in the tree, its count is increased instead. The "this" node is treated as the root of the tree. Returns a pointer to the modified tree. */ Node* Node::insert(char* newWord){ if (this == NULL) { //base case: empty tree return new Node(newWord); }else { if (strcmp(this->word, newWord) > 0) { this->left = (this->left)->insert(newWord); }else if (strcmp(this->word, newWord) < 0) { this->right = (this->right)->insert(newWord); }else{ //same word this->count++; } return this; } } /* Prints the tree to the screen as an ordered list. */ void Node::printTree(){ if (this != NULL) { this->left->printTree(); printf("%3d %s\n", this->count, this->word); this->right->printTree(); } } [root@Fizzgig C]# cat tree.hpp #include #include "node.hpp" unsigned int countNodes(Node* tree); //total number of nodes in tree unsigned int countContainingNodes(Node* tree, char* str); unsigned int mostFrequentWord(Node* tree); [root@Fizzgig C]# cat tree.cpp /* Certain tree and node utility functions. */ /* Author: Zach Tomaszewski */ /* Date: 29 Mar 2003 */ #include "tree.hpp" /* Takes a tree node. Returns a count of all nodes (unique words) in the tree. */ unsigned int countNodes(Node* tree){ if (tree == NULL) { return 0; }else { return (countNodes(tree->getLeftNode()) + 1 + countNodes(tree->getRightNode()) ); } } /* Returns a count of all the nodes that contain the given substring. */ unsigned int countContainingNodes(Node* tree, char* substr){ unsigned int nodeCount = 0; char* word; if (tree == NULL) { nodeCount = 0; }else{ word = tree->getWord(); nodeCount = (strstr(word, substr) != NULL) ? 1 : 0; delete word; nodeCount += countContainingNodes(tree->getLeftNode(), substr); nodeCount += countContainingNodes(tree->getRightNode(), substr); } return nodeCount; } /* Takes a tree. Returns the highest node->count, which corresponds to the most frequent word in the input. */ unsigned int mostFrequentWord(Node* tree){ if (tree == NULL) { return 0; }else { unsigned int highCount = tree->getCount(); unsigned int leftCount = mostFrequentWord(tree->getLeftNode()); unsigned int rightCount = mostFrequentWord(tree->getRightNode()); highCount = (leftCount > highCount) ? leftCount : highCount; highCount = (rightCount > highCount) ? rightCount : highCount; return highCount; } } [root@Fizzgig C]# make g++ -c main.cpp g++ -c tree.cpp g++ -c node.cpp g++ -g main.o tree.o node.o -o wordtree [root@Fizzgig C]# wordtree se.txt 4 de 1 deux 1 dieu 1 french 1 proverb 4 secret 1 tous 1 trois There are 8 unique words. 0 unique words contain 'a'. The most frequently-occuring word occurs 4 times. [root@Fizzgig C]# wordtree -s e declares.txt 16 a 1 abdicated 1 abolish 3 abolishing 3 absolute 1 absolved 1 abuses 1 accommodation 1 accordingly 1 accustomed 1 acquiesce 1 act 2 acts 1 administration 1 affected 1 after 2 against 2 ages 10 all 1 allegiance 1 alliances 1 alone 1 already 2 alter 1 altering 2 america 5 among 1 amongst 1 amount 5 an [...] 2 unless 1 unusual 1 unwarrantable 1 unworthy 11 us 3 usurpations 1 utterly 1 valuable 1 voice 1 waging 1 wanting 3 war 1 warfare 1 warned 11 we 1 whatsoever 3 when 1 whenever 1 whereby 10 which 1 while 1 wholesome 2 whose 2 will 9 with 1 within 3 without 1 works 3 world 2 would There are 534 unique words. 346 unique words contain 'e'. The most frequently-occuring word occurs 78 times. [root@Fizzgig C]# exit Script done on Mon Mar 31 19:54:22 2003