Ok, so I wrote my row-by-row parser. I'll be dealing with the order of 1,000's of rows so it's speedy enough. Some suggestions on StackOverflow (
datatable - reading Excel Open XML is ignoring blank cells - Stack Overflow[
^]) may be more efficient:
foreach (OpenXmlWorksheetSelector selector in sheetIndexes)
{
WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(selector.Id));
if (wsPart != null)
{
Row[] rows = wsPart.Worksheet.Descendants<Row>().ToArray();
var headerCells = rows.SelectMany(r => r.Descendants<Cell>()).ToArray();
var maxColumnIndex = headerCells.Max(c =>
GetColumnIndexByLetters(c.GetAttribute("r", "").Value));
object[,] cells = new object[rows.Count(), maxColumnIndex + 4];
foreach (Row row in rows)
{
foreach (Cell cell in row.Descendants<Cell>())
{
cells[
int.Parse(row.GetAttribute("r", "").Value) - 1,
GetColumnIndexByLetters(cell.GetAttribute("r", "").Value)] = GetCellValue(cell,
wbPart);
}
}
object[][] allObjects = cells.ToEnumerable()
.Where(r => r.Any(c => !(c ?? "").ToString().IsEmpty()))
.Select(r => r.Concat(new object[]
{
selector.WorkBookName,
selector.Name,
selector.Id.ToString()
}).ToArray()).ToArray();
var p = Products(columnDetailSelectors, allObjects);
if (p != null)
products.AddRange(p);
}