Data Browser - Viewing Site  Sector 23 Code Bank Logged in as:  Guest  




           


Extract Images from XLSX File using ASP.NET
I had a need to get the images pasted into various sheets from XLSX files and move them to a .NET program. I wanted to do this without Excel Installed, without any Excel Interop Dlls, and without any third party Excel manipulation tools.

I was using ADO to read data from Excel, but ADO does not include the image data when reading the file.

The solution is a very neat trick I have learned: An XLSX file is simply a .ZIP file renamed ".XLSX". So, the solution is to rename the XLSX file to .ZIP, and then open it using your favorite ZIP reader. You will find all the images in a 'media' subfolder.

Taking it one step further, what if you want to know on what sheet, row, and column the original image was pasted at? You will find various XML files within the zip folder with this information.
It takes a few of them linked together to recreate the image metadata, so I have made a neat tool to do this. You can open the XML files yourself and add even more functionality as needed; this literally just extracts the row, column, image name, and sheet name for each image in the XLSX file.

Note that I here reference Ionic.Zip, which is free. You can replace it with your favorite Zip Reader.

Usage:
Instantiate the class.
Iterate over ImagesLoaded items and call GetImageContent as needed.

>>>>>>>>>>>>>>>>>>>>>>


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Ionic.Zip;
using System.Xml.Linq;

namespace Imports
{
/// <summary>
/// Opens an XLSX file and reads the images within
/// </summary>
public class ExcelImageReader : IDisposable
{
IEnumerable<XElement> xSheets; // data about the sheets in the file
IEnumerable<XElement> xRelationships; // data about the relationships in the file
public List<ExcelImage> ImagesLoaded;
string mediaFolder;
string tempDir;

/// <summary>
/// Load images found in file into ImagesLoaded object
/// </summary>
/// <param name="xlsxPath"></param>
public ExcelImageReader(string xlsxPath)
{
ImagesLoaded = new List<ExcelImage>();
if (!File.Exists(xlsxPath))
throw new InvalidBusinessOperationException("XLSX is required to use this uploader.");
if (Path.GetExtension(xlsxPath).ToLower() != ".xlsx")
throw new InvalidBusinessOperationException("XLSX is required to use this uploader.");
string zipPath = Path.Combine(Path.GetDirectoryName(xlsxPath), Path.GetFileNameWithoutExtension(xlsxPath) + ".zip");
tempDir = Path.Combine(Path.GetDirectoryName(zipPath), "ZipImages", Guid.NewGuid().ToString());

if (!Directory.Exists(tempDir))
{ // make sure temp directory exists
if (!File.Exists(zipPath))
{
File.Copy(xlsxPath, zipPath);
}

Directory.CreateDirectory(tempDir);
using (ZipFile zip = new ZipFile(zipPath))
{
zip.ParallelDeflateThreshold = -1; // was causing hangs, had to turn off.
zip.ExtractAll(tempDir, ExtractExistingFileAction.OverwriteSilently);
}
}
if (File.Exists(zipPath))
File.Delete(zipPath); // clean up zip

// data we need to extract images...
mediaFolder = Path.Combine(tempDir, "xl", "media");
string relsFolder = Path.Combine(tempDir, "xl", "drawings", "_rels");
string drawingsFolder = Path.Combine(tempDir, "xl", "drawings");
string workbookPath = Path.Combine(tempDir, "xl", "workbook.xml");
var xDoc = XElement.Load(workbookPath);
var ns = xDoc.GetDefaultNamespace();
xSheets = xDoc.Element(ns + "sheets").Elements(ns + "sheet");
foreach (var xSheet in xSheets)
{
int sheetId = int.Parse(xSheet.Attribute("sheetId").Value);
string sheetName = xSheet.Attribute("name").Value;
FetchImages(sheetId, drawingsFolder, relsFolder, sheetName);
}
}

void FetchImages(int sheetId, string drawingsFolder, string relsFolder, string sheetName)
{
string drawingPath = Path.Combine(drawingsFolder, "drawing" + sheetId + ".xml");
string relsPath = Path.Combine(relsFolder, "drawing" + sheetId + ".xml.rels");
var xDoc = XElement.Load(relsPath);
var ns = xDoc.GetDefaultNamespace();
xRelationships = xDoc.Elements(ns + "Relationship");
xDoc = XElement.Load(drawingPath);
ns = xDoc.GetNamespaceOfPrefix("xdr");
var nsa = xDoc.GetNamespaceOfPrefix("a");
var xAnchors = xDoc.Elements(ns + "twoCellAnchor");
foreach (var xAnchor in xAnchors)
{
FetchAnchor(xAnchor, sheetName, ns, nsa);
}
}

void FetchAnchor(XElement xAnchor, string sheetName, XNamespace xdrNamespace, XNamespace anchorNamespace)
{
var xdrFrom = xAnchor.Element(xdrNamespace + "from");
var xdrTo = xAnchor.Element(xdrNamespace + "to");
var xdrBlip = xAnchor.Element(xdrNamespace + "pic").Element(xdrNamespace + "blipFill").Element(anchorNamespace + "blip");
var nsR = xdrBlip.GetNamespaceOfPrefix("r");
var relId = xdrBlip.Attribute(nsR + "embed").Value;
// load the relationship
var xRelationship = xRelationships.First(r => r.Attribute("Id").Value == relId);
var target = Path.GetFileName(xRelationship.Attribute("Target").Value);
ExcelImage ei = new ExcelImage();
ei.ImageFileName = target;
ei.RelationshipId = relId;
ei.ColumnFrom = int.Parse(xdrFrom.Element(xdrNamespace + "col").Value);
ei.RowFrom = int.Parse(xdrFrom.Element(xdrNamespace + "row").Value);
ei.ColumnTo = int.Parse(xdrTo.Element(xdrNamespace + "col").Value);
ei.RowTo = int.Parse(xdrTo.Element(xdrNamespace + "row").Value);
ei.SheetName = sheetName;
ImagesLoaded.Add(ei);
}

/// <summary>
/// Get the binary content of this file
/// </summary>
/// <param name="imageFileName"></param>
/// <returns></returns>
public byte[] GetImageContent(string imageFileName)
{
string path = Path.GetFileName(imageFileName);
path = Path.Combine(mediaFolder, path);
if (File.Exists(path))
return File.ReadAllBytes(path);
else
return null;
}

/// <summary>
/// clean up temp zip archive when done processing
/// </summary>
public void Dispose()
{
if (Directory.Exists(tempDir))
Directory.Delete(tempDir, true);
}
} // class
public class ExcelImage
{
public ExcelImage() { }

public string SheetName { get; set; }
public string ImageFileName { get; set; }
public string RelationshipId { get; set; }
public int ColumnFrom { get; set; }
public int ColumnTo { get; set; }
public int RowFrom { get; set; }
public int RowTo { get; set; }
}
}

Created By: amos 10/4/2013 5:50:56 PM
Updated: 10/4/2013 5:53:50 PM