-
Notifications
You must be signed in to change notification settings - Fork 3
/
dictionary.c
207 lines (171 loc) · 5.21 KB
/
dictionary.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/**
* dictionary.c
*
* Computer Science 50
* Problem Set 5
*
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"
#define ALPHA 27 // alphabet (26 chars) + 1 apostrophe
// node structure
typedef struct node
{
bool is_word;
struct node* children[ALPHA];
}
node;
// declare root node
node* root;
//declare int for size() function
int wordCount;
// global variable to build a nodeBucket
long fileSize;
// declare nodBucket for load() and unload()
node* nodeBucket;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// declare and initialize traverse pointer to NULL
node* trav = NULL;
// set traverse pointer to root (points to root with each new word to check)
trav = root;
// variable to iterate char by char in a word
int i = 0;
// check whole word
while(word[i] != '\0')
{
// distinguish between letter (handle case-insensitivity here if so) and apostrophe
char c = (isalpha(word[i]))?tolower(word[i]):word[i];
// case for appostrophe
if(c == '\'')
{
// check if pointer in the index array is NULL, if so - no word found, else go to next node
if(trav->children[ALPHA-1] == NULL)
{
return false;
}
trav = trav->children[ALPHA-1];
}
// case for letters
else if(isalpha(c))
{
// check if pointer in the index array is NULL, if so - no word found, else go to next node
if(trav->children[c - 'a'] == NULL)
{
return false;
}
trav = trav->children[c - 'a'];
}
// go to next char
i++;
}
// return boolean value from node after word is checked
return trav->is_word;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// open dictionary file
FILE* dict = fopen(dictionary, "rb"); // "b" mode is for optimization
if(dict == false)
{
printf("Could not open this dictionary (dictionary.c file)");
return false;
}
// get size of file
fseek(dict, 0, SEEK_END);
fileSize = ftell(dict);
// set file pointer back to the beginning
fseek(dict, 0, SEEK_SET);
// initialize nodeBucket with enough memory
nodeBucket = calloc((fileSize), sizeof(node)); // (fileSize) allocates enough memory for different dictionaries
// where to start
node* nextFreeNode = nodeBucket;
// read entire file, put data into buffer
char* buffer = malloc(fileSize + 1);
fread(buffer, 1, fileSize, dict);
// mark end of file
buffer[fileSize] = '\0';
// initialize root
root = nextFreeNode + 1;
// declare and initialize traverse pointer to navigate through data
node* trav = NULL;
// make new string to load words into a trie
char* words = buffer;
//initialize int for size() function
wordCount = 0;
// loop until '\0' (or nul) is reached; "*words" needed instead of "words" to represent a char
while(*words)
{
// traverse node points to root with each new word
trav = root;
// put all words into already allocated memory: check if a char represents
// a new line to find end of word, char by char
for(; *words != '\n' && *words; words++)
{
// case for apostrophe
if(*words == '\'')
{
// check the index of children array, if NULL, go to nextFreeNode, then put
// the right value into a trie
if(trav->children[ALPHA-1] == NULL)
{
trav->children[ALPHA-1] = nextFreeNode++;
}
trav = trav->children[ALPHA-1];
}
// case for letters
else
{
// check the index of children array, if NULL, go to nextFreeNode, then put
// the right value into a trie
if(trav->children[*words - 'a'] == NULL)
{
trav->children[*words - 'a'] = nextFreeNode++;
}
trav = trav->children[*words - 'a'];
}
}
//when word is found, set boolean value to true && increment wordCount
trav->is_word = true;
wordCount++;
// if new word is found (end of line), repeat with the rest of a buffer
if(*words == '\n')
{
words++;
}
}
// housekeeping
fclose(dict);
free(buffer);
// loaded
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
// already known by load()
return wordCount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// only need to free() one big chunk of memory (save time!)
free(nodeBucket);
// unloaded
return true;
}