Using
xlrd, and based on
CSV source section, I've written a pipeline section to read data stored inside
Excel spreadsheets.
The code looks like this:
import xlrd
from zope.interface import classProvides
from zope.interface import implements
from collective.transmogrifier.interfaces import ISection
from collective.transmogrifier.interfaces import ISectionBlueprint
from collective.transmogrifier.utils import resolvePackageReferenceOrFile
class ExcelReader:
def __init__(self, f, s, *args, **kwds):
self.fieldnames = None
book = xlrd.open_workbook(f)
sheet = book.sheet_by_name(s)
self.reader = list()
for row in range(0, sheet.nrows):
self.reader.append([sheet.cell_value(row, col)
for col in range(0, sheet.ncols)])
self.reader = iter(self.reader)
def __iter__(self):
return self
def next(self):
row = self.reader.next()
if self.fieldnames is None:
self.fieldnames = row
row = self.reader.next()
return dict(zip(self.fieldnames, row))
class ExcelSource(object):
classProvides(ISectionBlueprint)
implements(ISection)
def __init__(self, transmogrifier, name, options, previous):
self.previous = previous
filename = resolvePackageReferenceOrFile(options['filename'])
sheet = options['sheet']
self.reader = ExcelReader(filename, sheet)
def __iter__(self):
for item in self.previous:
yield item
for item in self.reader:
yield item
The pipeline section assumes that the field names are in the first row of the spreadsheet. Note also the use of
iter() to convert the list of rows into an
iterator.
To use the pipeline section you only need to add something like this in your transmogrifier's configuration:
[excelsource]
blueprint = your.package.excelsource
filename = excel_book
sheet = sheet_inside_the_book
Remember to register the utility in your configure.zcml file:
<utility
component="your.package.ExcelSource"
name="your.package.excelsource"
/>
xlrd supports Excel spreadsheets
up to version 2007.