[gen] Bugfixes in the search machinery.
This commit is contained in:
parent
cf2cbc52d6
commit
225ea927a4
16 changed files with 81 additions and 43 deletions
23
gen/utils.py
23
gen/utils.py
|
@ -91,18 +91,35 @@ class SomeObjects:
|
|||
else: getMethod = 'getObject'
|
||||
self.objects = [getattr(b, getMethod)() for b in brains]
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
def splitIntoWords(text, ignore=2):
|
||||
'''Split the cleaned index value p_text into words (returns a list of
|
||||
words). Words whose length is below p_ignore are ignored, excepted digits
|
||||
which are always kept. Duplicate words are removed (result is a set and
|
||||
not a list).'''
|
||||
# Split p_text into words
|
||||
res = text.split()
|
||||
# Remove shorter words not being figures
|
||||
i = len(res) - 1
|
||||
while i > -1:
|
||||
if (len(res[i]) <= ignore) and not res[i].isdigit():
|
||||
del res[i]
|
||||
i -= 1
|
||||
# Remove duplicates
|
||||
return set(res)
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
class Keywords:
|
||||
'''This class allows to handle keywords that a user enters and that will be
|
||||
used as basis for performing requests in a TextIndex/XhtmlIndex.'''
|
||||
|
||||
toRemove = '?-+*()'
|
||||
def __init__(self, keywords, operator='AND'):
|
||||
# Clean the p_keywords that the user has entered.
|
||||
def __init__(self, keywords, operator='AND', ignore=2):
|
||||
# Clean the p_keywords that the user has entered
|
||||
words = sutils.normalizeText(keywords)
|
||||
if words == '*': words = ''
|
||||
for c in self.toRemove: words = words.replace(c, ' ')
|
||||
self.keywords = words.split()
|
||||
self.keywords = splitIntoWords(words, ignore=ignore)
|
||||
# Store the operator to apply to the keywords (AND or OR)
|
||||
self.operator = operator
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue