188 lines
6 KiB
Python
188 lines
6 KiB
Python
# -*- coding: utf-8 -*-
|
|
################################################################################
|
|
#
|
|
# Rattail -- Retail Software Framework
|
|
# Copyright © 2010-2017 Lance Edgar
|
|
#
|
|
# This file is part of Rattail.
|
|
#
|
|
# Rattail is free software: you can redistribute it and/or modify it under the
|
|
# terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, either version 3 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# Rattail is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
# details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# Rattail. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
################################################################################
|
|
"""
|
|
CSV File Utilities
|
|
|
|
Contains various utilities relating to CSV file processing.
|
|
|
|
.. note::
|
|
This module is named ``csvutil`` instead of ``csv`` primarily as a
|
|
workaround to the problem of ``PythonService.exe`` insisting on doing
|
|
relative imports.
|
|
"""
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
import csv
|
|
import codecs
|
|
from cStringIO import StringIO
|
|
|
|
|
|
class DictWriter(csv.DictWriter):
|
|
"""
|
|
Convenience implementation of ``csv.DictWriter``.
|
|
|
|
This exists only to provide the :meth:`writeheader()` method on Python 2.6.
|
|
"""
|
|
|
|
def writeheader(self):
|
|
if hasattr(csv.DictWriter, 'writeheader'):
|
|
return csv.DictWriter.writeheader(self)
|
|
self.writer.writerow(self.fieldnames)
|
|
|
|
|
|
class UTF8Recoder(object):
|
|
"""
|
|
Iterator that reads an encoded stream and reencodes the input to UTF-8.
|
|
|
|
.. note::
|
|
This class was stolen from the Python 2.7 documentation.
|
|
"""
|
|
|
|
def __init__(self, fileobj, encoding, errors='strict'):
|
|
self.errors = errors
|
|
self.reader = codecs.getreader(encoding)(fileobj, errors=self.errors)
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def next(self):
|
|
return self.reader.next().encode('utf_8')
|
|
|
|
|
|
class UnicodeReader(object):
|
|
"""
|
|
A CSV reader which will iterate over lines in a CSV file, which is encoded
|
|
in the given encoding.
|
|
|
|
.. note::
|
|
This class was stolen from the Python 2.7 documentation.
|
|
"""
|
|
|
|
def __init__(self, fileobj, dialect=csv.excel, encoding='utf_8', errors='strict', **kwargs):
|
|
fileobj = UTF8Recoder(fileobj, encoding, errors=errors)
|
|
self.reader = csv.reader(fileobj, dialect=dialect, **kwargs)
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def next(self):
|
|
row = self.reader.next()
|
|
return [unicode(x, 'utf_8') for x in row]
|
|
|
|
|
|
class UnicodeDictReader(object):
|
|
"""
|
|
A CSV Dict reader which will iterate over lines in a CSV file, which is
|
|
encoded in the given encoding.
|
|
"""
|
|
|
|
def __init__(self, fileobj, dialect=csv.excel, encoding='utf_8', errors='strict', **kwargs):
|
|
fileobj = UTF8Recoder(fileobj, encoding, errors=errors)
|
|
self.reader = csv.reader(fileobj, dialect=dialect, **kwargs)
|
|
self.header = self.reader.next()
|
|
|
|
def next(self):
|
|
row = self.reader.next()
|
|
vals = [unicode(s, 'utf_8') for s in row]
|
|
return dict((self.header[i], vals[i]) for i in range(len(self.header)))
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
|
|
class UnicodeWriter(object):
|
|
"""
|
|
A CSV writer which will write rows to CSV file "f", which is encoded in the
|
|
given encoding.
|
|
|
|
.. note::
|
|
This class was stolen from the Python 2.7 documentation.
|
|
"""
|
|
|
|
def __init__(self, f, dialect='excel', encoding='utf_8', encoding_errors='strict', **kwargs):
|
|
# Redirect output to a queue
|
|
self.queue = StringIO()
|
|
self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
|
|
self.stream = f
|
|
self.encoder = codecs.getincrementalencoder(encoding)(encoding_errors)
|
|
|
|
def writerow(self, row):
|
|
self.writer.writerow([s.encode('utf_8') for s in row])
|
|
# Fetch UTF-8 output from the queue ...
|
|
data = self.queue.getvalue()
|
|
data = data.decode('utf_8')
|
|
# ... and reencode it into the target encoding
|
|
data = self.encoder.encode(data)
|
|
# write to the target stream
|
|
self.stream.write(data)
|
|
# empty queue
|
|
self.queue.truncate(0)
|
|
|
|
def writerows(self, rows):
|
|
for row in rows:
|
|
self.writerow(row)
|
|
|
|
|
|
class UnicodeDictWriter(UnicodeWriter):
|
|
"""
|
|
A ``DictWriter``-ish class which accepts row data as Unicode and can write
|
|
to the file with any encoding.
|
|
|
|
.. note::
|
|
This logic was stolen from a `Django snippet`_. The original docstring
|
|
from this snippet follows ("sic" applies here; our logic uses 'utf_8'
|
|
encoding and regular 'excel' dialect by default):
|
|
|
|
A CSV writer that produces Excel-compatibly CSV files from unicode data.
|
|
Uses UTF-16 and tabs as delimeters - it turns out this is the only way to
|
|
get unicode data in to Excel using CSV.
|
|
|
|
Usage example::
|
|
|
|
fp = open('my-file.csv', 'wb')
|
|
writer = UnicodeDictWriter(fp, ['name', 'age', 'shoesize'])
|
|
writer.writerows([
|
|
{'name': u'Bob', 'age': 22, 'shoesize': 7},
|
|
{'name': u'Sue', 'age': 28, 'shoesize': 6},
|
|
{'name': u'Ben', 'age': 31, 'shoesize': 8},
|
|
# \xc3\x80 is LATIN CAPITAL LETTER A WITH MACRON
|
|
{'name': '\xc4\x80dam'.decode('utf8'), 'age': 11, 'shoesize': 4},
|
|
])
|
|
fp.close()
|
|
|
|
Initially derived from http://docs.python.org/lib/csv-examples.html
|
|
|
|
.. _`Django snippet`: https://djangosnippets.org/snippets/993/
|
|
"""
|
|
|
|
def __init__(self, f, fields, dialect='excel', encoding='utf_8', **kwds):
|
|
super(UnicodeDictWriter, self).__init__(f, dialect, encoding, **kwds)
|
|
self.fields = fields
|
|
|
|
def writerow(self, drow):
|
|
row = [drow.get(field, '') for field in self.fields]
|
|
super(UnicodeDictWriter, self).writerow(row)
|
|
|
|
def writeheader(self):
|
|
super(UnicodeDictWriter, self).writerow(self.fields)
|