[gen] Indexer: textual content of indexes now keep only words of more than 2 chars and other improvements.

This commit is contained in:
Gaetan Delannay 2014-12-26 12:24:38 +01:00
parent 06c656d278
commit fd5e88928d
3 changed files with 27 additions and 18 deletions

View file

@ -69,16 +69,17 @@ def updateIndexes(installer, indexInfo):
logger.info('Done.')
# ------------------------------------------------------------------------------
def splitIntoWords(text):
def splitIntoWords(text, ignore=2):
'''Split the cleaned index value p_text into words (returns a list of
words). Words of a single char are ignored, excepted digits which are
always kept. Duplicate words are removed (result is a set and not a
list).'''
words). Words whose length is below p_ignore are ignored, excepted digits
which are always kept. Duplicate words are removed (result is a set and
not a list).'''
# Split p_text into words
res = text.split()
# Remove tokens of a single char (excepted if this char is a digit).
i = len(res)-1
while i > -1 :
if (len(res[i]) < 2) and not res[i].isdigit():
# Remove shorter words not being figures
i = len(res) - 1
while i > -1:
if (len(res[i]) <= ignore) and not res[i].isdigit():
del res[i]
i -= 1
# Remove duplicates

View file

@ -244,6 +244,7 @@ class ToolMixin(BaseMixin):
# The search is triggered from an app-wide search
klass = self.getAppyClass(className)
fieldNames = getattr(klass, 'searchFields', None)
if callable(fieldNames): fieldNames = fieldNames(self.appy())
if not fieldNames:
# Gather all the indexed fields on this class
fieldNames = [f.name for f in self.getAllAppyTypes(className) \