Fix rtf parser

This commit is contained in:
Stefan Klug 2015-11-03 20:55:38 +01:00
parent 07adc8ce4c
commit 9143d0a517

View file

@ -318,24 +318,24 @@ class TableParser:
self.state = TableParser.READING_CONTENT self.state = TableParser.READING_CONTENT
cWord = self.controlWordBuffer cWord = self.controlWordBuffer
if cWord == 'trowd': if cWord == 'trowd':
self.contentBuffer.truncate(0) self.contentBuffer = StringIO()
elif cWord == 'row': elif cWord == 'row':
self.onRow() self.onRow()
self.contentBuffer.truncate(0) self.contentBuffer = StringIO()
elif cWord == 'cell': elif cWord == 'cell':
self.onColumn(self.contentBuffer.getvalue().strip()) self.onColumn(self.contentBuffer.getvalue().strip())
self.contentBuffer.truncate(0) self.contentBuffer = StringIO()
elif cWord in ('bkmkstart', 'bkmkend'): elif cWord in ('bkmkstart', 'bkmkend'):
self.state = TableParser.IGNORE self.state = TableParser.IGNORE
self.controlWordBuffer = '' self.controlWordBuffer = ''
def manageSpecialChar(self): def manageSpecialChar(self):
specialChar = int(self.specialCharBuffer) if len(self.specialCharBuffer) == 2:
specialChar = bytes.fromhex(self.specialCharBuffer).decode('utf-8')
self.specialCharBuffer = '' self.specialCharBuffer = ''
if specialChar in self.specialChars:
self.contentBuffer.write(self.specialChars[specialChar])
else:
print(('Warning: char %d not known.' % specialChar))
self.state = TableParser.READING_CONTENT self.state = TableParser.READING_CONTENT
if specialChar is not '':
self.contentBuffer.write(specialChar)
def bufferize(self, char): def bufferize(self, char):
if self.state == TableParser.READING_CONTROL_WORD: if self.state == TableParser.READING_CONTROL_WORD:
self.controlWordBuffer += char self.controlWordBuffer += char
@ -346,20 +346,16 @@ class TableParser:
def parse(self): def parse(self):
for line in self.input: for line in self.input:
for char in line: for char in line:
if self.isGroupDelimiter(char):
if self.state == TableParser.READING_SPECIAL_CHAR: if self.state == TableParser.READING_SPECIAL_CHAR:
self.bufferize(char)
self.manageSpecialChar() self.manageSpecialChar()
continue
if self.isGroupDelimiter(char):
self.state = TableParser.READING_CONTENT self.state = TableParser.READING_CONTENT
elif self.isControlWordStart(char): elif self.isControlWordStart(char):
if self.state == TableParser.READING_CONTROL_WORD:
self.manageControlWord() self.manageControlWord()
elif self.state == TableParser.READING_SPECIAL_CHAR:
self.manageSpecialChar()
self.controlWordBuffer = ''
self.state = TableParser.READING_CONTROL_WORD self.state = TableParser.READING_CONTROL_WORD
elif self.isAlpha(char): elif self.isAlpha(char):
if self.state == TableParser.READING_SPECIAL_CHAR:
self.manageSpecialChar()
self.bufferize(char) self.bufferize(char)
elif self.isNumeric(char): elif self.isNumeric(char):
self.bufferize(char) self.bufferize(char)
@ -369,25 +365,15 @@ class TableParser:
elif self.state == TableParser.READING_CONTENT: elif self.state == TableParser.READING_CONTENT:
if char not in ['\n', '\r']: if char not in ['\n', '\r']:
self.contentBuffer.write(char) self.contentBuffer.write(char)
elif self.state == TableParser.READING_SPECIAL_CHAR:
self.manageSpecialChar()
if char not in ['\n', '\r']:
self.contentBuffer.write(char)
elif self.isQuote(char): elif self.isQuote(char):
if (self.state == TableParser.READING_CONTROL_WORD) and \ if (self.state == TableParser.READING_CONTROL_WORD) and \
not self.controlWordBuffer: not self.controlWordBuffer:
self.state = TableParser.READING_SPECIAL_CHAR self.state = TableParser.READING_SPECIAL_CHAR
elif self.state == TableParser.READING_SPECIAL_CHAR:
self.manageSpecialChar()
self.bufferize(char)
else: else:
self.bufferize(char) self.bufferize(char)
else: else:
if self.state == TableParser.READING_CONTENT:
self.contentBuffer.write(char)
elif self.state == TableParser.READING_SPECIAL_CHAR:
self.manageSpecialChar()
self.contentBuffer.write(char) self.contentBuffer.write(char)
if self.controlWordBuffer: if self.controlWordBuffer:
self.manageControlWord() self.manageControlWord()
if self.currentTableName: if self.currentTableName: