mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
b82eec714a
Summary: this moves sandbox/grist to core, and adds a requirements.txt file for reconstructing the content of sandbox/thirdparty. Test Plan: existing tests pass. Tested core functionality manually. Tested docker build manually. Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D2563
32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
"""
|
|
Simple class which, given a sample, can quickly count the size of overlap with an iterable.
|
|
All elements of sample must be hashable.
|
|
|
|
This is mainly in its own file in order to be able to test and time possible alternative
|
|
implementations.
|
|
"""
|
|
class MatchCounter(object):
|
|
def __init__(self, sample):
|
|
self.sample = set(sample)
|
|
|
|
def count_unique(self, iterable):
|
|
"""
|
|
Returns the count of unique elements of iterable that are present in sample. The sample may
|
|
only contain hashable elements, so non-hashable elements of iterable are never counted.
|
|
"""
|
|
# The simplest implementation is 5 times faster:
|
|
# len(self.sample.intersection(iterable))
|
|
# but fails if iterable can ever contain non-hashable values (e.g. list). This is the next
|
|
# best alternative. Attempting to skip non-hashable values with `isinstance(v, Hashable)` is
|
|
# another order of magnitude slower.
|
|
seen = set()
|
|
for v in iterable:
|
|
try:
|
|
if v in self.sample:
|
|
seen.add(v)
|
|
except TypeError:
|
|
# Non-hashable values can't possibly be in self.sample, so just don't count those.
|
|
pass
|
|
|
|
return len(seen)
|