[gen] Bugfixes in the search machinery.

This commit is contained in:
Gaetan Delannay 2015-01-02 16:16:48 +01:00
parent cf2cbc52d6
commit 225ea927a4
16 changed files with 81 additions and 43 deletions

View file

@ -91,18 +91,35 @@ class SomeObjects:
else: getMethod = 'getObject'
self.objects = [getattr(b, getMethod)() for b in brains]
# ------------------------------------------------------------------------------
def splitIntoWords(text, ignore=2):
'''Split the cleaned index value p_text into words (returns a list of
words). Words whose length is below p_ignore are ignored, excepted digits
which are always kept. Duplicate words are removed (result is a set and
not a list).'''
# Split p_text into words
res = text.split()
# Remove shorter words not being figures
i = len(res) - 1
while i > -1:
if (len(res[i]) <= ignore) and not res[i].isdigit():
del res[i]
i -= 1
# Remove duplicates
return set(res)
# ------------------------------------------------------------------------------
class Keywords:
'''This class allows to handle keywords that a user enters and that will be
used as basis for performing requests in a TextIndex/XhtmlIndex.'''
toRemove = '?-+*()'
def __init__(self, keywords, operator='AND'):
# Clean the p_keywords that the user has entered.
def __init__(self, keywords, operator='AND', ignore=2):
# Clean the p_keywords that the user has entered
words = sutils.normalizeText(keywords)
if words == '*': words = ''
for c in self.toRemove: words = words.replace(c, ' ')
self.keywords = words.split()
self.keywords = splitIntoWords(words, ignore=ignore)
# Store the operator to apply to the keywords (AND or OR)
self.operator = operator