[gen] Indexer: textual content of indexes now keep only words of more than 2 chars and other improvements.
This commit is contained in:
parent
06c656d278
commit
fd5e88928d
3 changed files with 27 additions and 18 deletions
|
@ -69,16 +69,17 @@ def updateIndexes(installer, indexInfo):
|
|||
logger.info('Done.')
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
def splitIntoWords(text):
|
||||
def splitIntoWords(text, ignore=2):
|
||||
'''Split the cleaned index value p_text into words (returns a list of
|
||||
words). Words of a single char are ignored, excepted digits which are
|
||||
always kept. Duplicate words are removed (result is a set and not a
|
||||
list).'''
|
||||
words). Words whose length is below p_ignore are ignored, excepted digits
|
||||
which are always kept. Duplicate words are removed (result is a set and
|
||||
not a list).'''
|
||||
# Split p_text into words
|
||||
res = text.split()
|
||||
# Remove tokens of a single char (excepted if this char is a digit).
|
||||
i = len(res)-1
|
||||
while i > -1 :
|
||||
if (len(res[i]) < 2) and not res[i].isdigit():
|
||||
# Remove shorter words not being figures
|
||||
i = len(res) - 1
|
||||
while i > -1:
|
||||
if (len(res[i]) <= ignore) and not res[i].isdigit():
|
||||
del res[i]
|
||||
i -= 1
|
||||
# Remove duplicates
|
||||
|
|
|
@ -244,6 +244,7 @@ class ToolMixin(BaseMixin):
|
|||
# The search is triggered from an app-wide search
|
||||
klass = self.getAppyClass(className)
|
||||
fieldNames = getattr(klass, 'searchFields', None)
|
||||
if callable(fieldNames): fieldNames = fieldNames(self.appy())
|
||||
if not fieldNames:
|
||||
# Gather all the indexed fields on this class
|
||||
fieldNames = [f.name for f in self.getAllAppyTypes(className) \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue