-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathtest_accuracy.py
executable file
·154 lines (120 loc) · 5.54 KB
/
test_accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#! /usr/bin/env python
'''All the tests'''
import random
import string
import unittest
import pyreBloom
from redis import Redis
from pyreBloom import pyreBloomException
def sample_strings(length, count):
'''Return a set of sample strings'''
return [''.join(
random.sample(string.lowercase, length)) for i in range(count)]
class BaseTest(unittest.TestCase):
CAPACITY = 10000
ERROR_RATE = 0.1
KEY = 'pyreBloomTesting'
def setUp(self):
self.bloom = pyreBloom.pyreBloom(self.KEY, self.CAPACITY, self.ERROR_RATE)
self.redis = Redis()
def tearDown(self):
'''Remove the bloom filter at the provided test key in all databases'''
databases = int(self.redis.config_get('databases').get('databases', 0))
for db in range(databases):
pyreBloom.pyreBloom(self.KEY, 1, 0.1, db=db).delete()
class ErrorsTest(BaseTest):
'''Tests about various error conditions'''
def test_connection_refused(self):
'''In this test I want to make sure that we can catch errors when
connecting to a redis instance'''
self.assertRaises(pyreBloom.pyreBloomException, pyreBloom.pyreBloom,
'pyreBloomTesting', 100, 0.01, port=1234)
def test_error(self):
'''If we encounter a redis error, we should raise exceptions'''
self.bloom.delete()
# If it's the wrong key type, we should see errors. Specifically, this has one
# of the keys used as a hash instead of a string.
self.redis.hmset('pyreBloomTesting.0', {'hello': 5})
self.assertRaises(pyreBloomException, self.bloom.add, 'hello')
self.assertRaises(pyreBloomException, self.bloom.extend, ['a', 'b'])
self.assertRaises(pyreBloomException, self.bloom.contains, 'a')
self.assertRaises(pyreBloomException, self.bloom.contains, ['a', 'b'])
class FunctionalityTest(BaseTest):
def test_delete(self):
'''Make sure that when we delete the bloom filter, we really do'''
samples = sample_strings(20, 5000)
self.bloom.extend(samples)
self.bloom.delete()
self.assertEqual(len(self.bloom.contains(samples)), 0,
'Failed to actually delete filter')
def test_add(self):
'''Make sure we can add, check existing in a basic way'''
tests = ['hello', 'how', 'are', 'you', 'today']
for test in tests:
self.bloom.add(test)
for test in tests:
self.assertTrue(test in self.bloom)
def test_extend(self):
'''Make sure we can use the extend method to the same effect'''
tests = ['hello', 'how', 'are', 'you', 'today']
self.bloom.extend(tests)
for test in tests:
self.assertTrue(test in self.bloom)
def test_contains(self):
'''Make sure contains returns a list when given a list'''
tests = ['hello', 'how', 'are', 'you', 'today']
self.bloom.extend(tests)
self.assertEqual(tests, self.bloom.contains(tests))
def test_two_instances(self):
'''Make sure two bloom filters pointing to the same key work'''
bloom = pyreBloom.pyreBloom('pyreBloomTesting', 10000, 0.1)
tests = ['hello', 'how', 'are', 'you', 'today']
# Add them through the first instance
self.bloom.extend(tests)
self.assertEqual(tests, self.bloom.contains(tests))
# Make sure they're accessible through the second instance
self.assertEqual(tests, bloom.contains(tests))
class DbTest(BaseTest):
'''Make sure we can select a database'''
def test_select_db(self):
'''Can instantiate a bloom filter in a separate db'''
bloom = pyreBloom.pyreBloom(self.KEY, self.CAPACITY, self.ERROR_RATE, db=1)
# After adding key to our db=0 bloom filter, shouldn't see it in our db=0 bloom
samples = sample_strings(20, 100)
self.bloom.extend(samples)
self.assertEqual(len(bloom.contains(samples)), 0)
class AllocationTest(BaseTest):
'''Tests about large allocations'''
CAPACITY = 200000000
ERROR_RATE = 0.00001
def test_size_allocation(self):
'''Make sure we can allocate a bloom filter that would take more than
512MB (the string size limit in Redis)'''
included = sample_strings(20, 5000)
excluded = sample_strings(20, 5000)
# Add only the included strings
self.bloom.extend(included)
self.assertEqual(len(included), len(self.bloom.contains(included)))
false_positives = self.bloom.contains(excluded)
false_rate = float(len(false_positives)) / len(excluded)
self.assertLess(
false_rate, 0.00001, 'False positive error rate exceeded!')
# We also need to know that we can access all the keys we need
self.assertEqual(self.bloom.keys(),
['pyreBloomTesting.0', 'pyreBloomTesting.1'])
class Accuracytest(BaseTest):
'''Make sure we meet our accuracy expectations for the bloom filter'''
def test_random(self):
'''Insert some random strings, make sure we don't see another set of
random strings as in the bloom filter'''
included = sample_strings(20, 5000)
excluded = sample_strings(20, 5000)
# Add only the included strings
self.bloom.extend(included)
self.assertTrue(len(included) == len(self.bloom.contains(included)))
false_positives = self.bloom.contains(excluded)
false_rate = float(len(false_positives)) / len(excluded)
self.assertLess(
false_rate, 0.1, 'False positive error rate exceeded!')
if __name__ == '__main__':
unittest.main()