Fix rtf parser
This commit is contained in:
parent
07adc8ce4c
commit
9143d0a517
|
@ -318,24 +318,24 @@ class TableParser:
|
||||||
self.state = TableParser.READING_CONTENT
|
self.state = TableParser.READING_CONTENT
|
||||||
cWord = self.controlWordBuffer
|
cWord = self.controlWordBuffer
|
||||||
if cWord == 'trowd':
|
if cWord == 'trowd':
|
||||||
self.contentBuffer.truncate(0)
|
self.contentBuffer = StringIO()
|
||||||
elif cWord == 'row':
|
elif cWord == 'row':
|
||||||
self.onRow()
|
self.onRow()
|
||||||
self.contentBuffer.truncate(0)
|
self.contentBuffer = StringIO()
|
||||||
elif cWord == 'cell':
|
elif cWord == 'cell':
|
||||||
self.onColumn(self.contentBuffer.getvalue().strip())
|
self.onColumn(self.contentBuffer.getvalue().strip())
|
||||||
self.contentBuffer.truncate(0)
|
self.contentBuffer = StringIO()
|
||||||
elif cWord in ('bkmkstart', 'bkmkend'):
|
elif cWord in ('bkmkstart', 'bkmkend'):
|
||||||
self.state = TableParser.IGNORE
|
self.state = TableParser.IGNORE
|
||||||
self.controlWordBuffer = ''
|
self.controlWordBuffer = ''
|
||||||
|
|
||||||
def manageSpecialChar(self):
|
def manageSpecialChar(self):
|
||||||
specialChar = int(self.specialCharBuffer)
|
if len(self.specialCharBuffer) == 2:
|
||||||
self.specialCharBuffer = ''
|
specialChar = bytes.fromhex(self.specialCharBuffer).decode('utf-8')
|
||||||
if specialChar in self.specialChars:
|
self.specialCharBuffer = ''
|
||||||
self.contentBuffer.write(self.specialChars[specialChar])
|
self.state = TableParser.READING_CONTENT
|
||||||
else:
|
if specialChar is not '':
|
||||||
print(('Warning: char %d not known.' % specialChar))
|
self.contentBuffer.write(specialChar)
|
||||||
self.state = TableParser.READING_CONTENT
|
|
||||||
def bufferize(self, char):
|
def bufferize(self, char):
|
||||||
if self.state == TableParser.READING_CONTROL_WORD:
|
if self.state == TableParser.READING_CONTROL_WORD:
|
||||||
self.controlWordBuffer += char
|
self.controlWordBuffer += char
|
||||||
|
@ -346,20 +346,16 @@ class TableParser:
|
||||||
def parse(self):
|
def parse(self):
|
||||||
for line in self.input:
|
for line in self.input:
|
||||||
for char in line:
|
for char in line:
|
||||||
|
if self.state == TableParser.READING_SPECIAL_CHAR:
|
||||||
|
self.bufferize(char)
|
||||||
|
self.manageSpecialChar()
|
||||||
|
continue
|
||||||
if self.isGroupDelimiter(char):
|
if self.isGroupDelimiter(char):
|
||||||
if self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
self.state = TableParser.READING_CONTENT
|
self.state = TableParser.READING_CONTENT
|
||||||
elif self.isControlWordStart(char):
|
elif self.isControlWordStart(char):
|
||||||
if self.state == TableParser.READING_CONTROL_WORD:
|
self.manageControlWord()
|
||||||
self.manageControlWord()
|
|
||||||
elif self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
self.controlWordBuffer = ''
|
|
||||||
self.state = TableParser.READING_CONTROL_WORD
|
self.state = TableParser.READING_CONTROL_WORD
|
||||||
elif self.isAlpha(char):
|
elif self.isAlpha(char):
|
||||||
if self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
self.bufferize(char)
|
self.bufferize(char)
|
||||||
elif self.isNumeric(char):
|
elif self.isNumeric(char):
|
||||||
self.bufferize(char)
|
self.bufferize(char)
|
||||||
|
@ -369,25 +365,15 @@ class TableParser:
|
||||||
elif self.state == TableParser.READING_CONTENT:
|
elif self.state == TableParser.READING_CONTENT:
|
||||||
if char not in ['\n', '\r']:
|
if char not in ['\n', '\r']:
|
||||||
self.contentBuffer.write(char)
|
self.contentBuffer.write(char)
|
||||||
elif self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
if char not in ['\n', '\r']:
|
|
||||||
self.contentBuffer.write(char)
|
|
||||||
elif self.isQuote(char):
|
elif self.isQuote(char):
|
||||||
if (self.state == TableParser.READING_CONTROL_WORD) and \
|
if (self.state == TableParser.READING_CONTROL_WORD) and \
|
||||||
not self.controlWordBuffer:
|
not self.controlWordBuffer:
|
||||||
self.state = TableParser.READING_SPECIAL_CHAR
|
self.state = TableParser.READING_SPECIAL_CHAR
|
||||||
elif self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
self.bufferize(char)
|
|
||||||
else:
|
else:
|
||||||
self.bufferize(char)
|
self.bufferize(char)
|
||||||
else:
|
else:
|
||||||
if self.state == TableParser.READING_CONTENT:
|
self.contentBuffer.write(char)
|
||||||
self.contentBuffer.write(char)
|
|
||||||
elif self.state == TableParser.READING_SPECIAL_CHAR:
|
|
||||||
self.manageSpecialChar()
|
|
||||||
self.contentBuffer.write(char)
|
|
||||||
if self.controlWordBuffer:
|
if self.controlWordBuffer:
|
||||||
self.manageControlWord()
|
self.manageControlWord()
|
||||||
if self.currentTableName:
|
if self.currentTableName:
|
||||||
|
|
Loading…
Reference in a new issue