The code looks like this:
import xlrd from zope.interface import classProvides from zope.interface import implements from collective.transmogrifier.interfaces import ISection from collective.transmogrifier.interfaces import ISectionBlueprint from collective.transmogrifier.utils import resolvePackageReferenceOrFile class ExcelReader: def __init__(self, f, s, *args, **kwds): self.fieldnames = None book = xlrd.open_workbook(f) sheet = book.sheet_by_name(s) self.reader = list() for row in range(0, sheet.nrows): self.reader.append([sheet.cell_value(row, col) for col in range(0, sheet.ncols)]) self.reader = iter(self.reader) def __iter__(self): return self def next(self): row = self.reader.next() if self.fieldnames is None: self.fieldnames = row row = self.reader.next() return dict(zip(self.fieldnames, row)) class ExcelSource(object): classProvides(ISectionBlueprint) implements(ISection) def __init__(self, transmogrifier, name, options, previous): self.previous = previous filename = resolvePackageReferenceOrFile(options['filename']) sheet = options['sheet'] self.reader = ExcelReader(filename, sheet) def __iter__(self): for item in self.previous: yield item for item in self.reader: yield item
The pipeline section assumes that the field names are in the first row of the spreadsheet. Note also the use of iter() to convert the list of rows into an iterator.
To use the pipeline section you only need to add something like this in your transmogrifier's configuration:
[excelsource] blueprint = your.package.excelsource filename = excel_book sheet = sheet_inside_the_book
Remember to register the utility in your configure.zcml file:
<utility component="your.package.ExcelSource" name="your.package.excelsource" />
xlrd supports Excel spreadsheets up to version 2007.