forked from vanangamudi/solthiruthi-sothanaikal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom.py
44 lines (34 loc) · 1.2 KB
/
bloom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# -*- coding: utf-8 -*-
from bloom_filter import BloomFilter
from random import shuffle
from tqdm import tqdm
import tqdm
import csv
import utils
from resources import DEFAULT_DICTIONARY_FILES, XSV_DELIMITER
BLOOMFILTER_SIZE = 200000 #no of items to add
BLOOMFILTER_PROB = 0.05 #false positive probability
def build_bloom(filepaths,
size = BLOOMFILTER_SIZE,
prob = BLOOMFILTER_PROB,
pbarp = False):
bloom = BloomFilter(size, prob)
for filepath in filepaths:
print('loading {}...'.format(filepath))
if pbarp:
pbar = tqdm.tqdm(utils.openfile(filepath), ncols=100)
else:
pbar = utils.openfile(filepath)
for item in csv.reader(pbar, delimiter=XSV_DELIMITER):
token, count = item
if token:
bloom.add(token)
if pbarp:
pbar.set_description(token)
return bloom
if __name__ == '__main__':
bloom = build_bloom(DEFAULT_DICTIONARY_FILES)
word = input('> ')
while word:
print('இருக்குதா? {}'.format('இருக்கு' if word in bloom else 'இல்லை'))
word = input('> ')