RSS

Category Archives: XLS

Convert ExcelML (Excel 2007/2010 xml file) to DataSet

Alright so I love Telerik and you guys who have worked with me know it but there is a issue with Telerik that is a deep dark secret I hide… lol its not that serious. So as we all know you can import regular xls files using the ace and jet OleDB libraries. Problem is that even with the new BIFF(Excel Binary) RadGrid export format Telerik put out this recent release it is still somewhat limited in its function. So the easiest way to keep formatting and make exports look pretty is to use the export type ExcelML which is a standardized xml format from excel. But there a catch. Guess what? There is no standardized way from Microsoft or anyone for that matter to process this file type back into your system!!!!!! Now you could always call the inter-op and COM libraries open the file and save it out as a binary xls but it requires you to have Excel installed on the computer that the program runs on. In my case that is going to be a web server and that is a BIG NO NO! So I wrote a way to custom parse the info back into the system as a DataSet dynamically (WITH NO REFLECTION!!!!!) and here follows the code in vb.net and c#. Make sure to include System.Data and System.XML.

    Public Shared Function ImportExcelXML(inputFileStream As Stream, hasHeaders As Boolean, autoDetectColumnType As Boolean) As DataSet
        Dim doc As New XmlDocument()
        doc.Load(New XmlTextReader(inputFileStream))
        Dim nsmgr As New XmlNamespaceManager(doc.NameTable)

        nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office")
        nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel")
        nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet")

        Dim ds As New DataSet()

        For Each node As XmlNode In doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)
            Dim dt As New DataTable(node.Attributes("ss:Name").Value)
            ds.Tables.Add(dt)
            Dim rows As XmlNodeList = node.SelectNodes("ss:Table/ss:Row", nsmgr)
            If rows.Count > 0 Then
                Dim columns As New List(Of ColumnType)()
                Dim startIndex As Integer = 0

                If hasHeaders Then
                    For Each data As XmlNode In rows(0).SelectNodes("ss:Cell/ss:Data", nsmgr)
                        columns.Add(New ColumnType(GetType(String)))
                        'default to text
                        dt.Columns.Add(data.InnerText, GetType(String))
                    Next
                    startIndex += 1
                End If

                If autoDetectColumnType AndAlso rows.Count > 0 Then
                    Dim cells As XmlNodeList = rows(startIndex).SelectNodes("ss:Cell", nsmgr)
                    Dim actualCellIndex As Integer = 0
                    For cellIndex As Integer = 0 To cells.Count - 1
                        Dim cell As XmlNode = cells(cellIndex)
                        If cell.Attributes("ss:Index") IsNot Nothing Then
                            actualCellIndex = Integer.Parse(cell.Attributes("ss:Index").Value) - 1
                        End If

                        Dim autoDetectType As ColumnType = [getType](cell.SelectSingleNode("ss:Data", nsmgr))

                        If actualCellIndex >= dt.Columns.Count Then
                            dt.Columns.Add("Column" + actualCellIndex.ToString(), autoDetectType.type)
                            columns.Add(autoDetectType)
                        Else
                            dt.Columns(actualCellIndex).DataType = autoDetectType.type
                            columns(actualCellIndex) = autoDetectType
                        End If

                        actualCellIndex += 1
                    Next
                End If

                For i As Integer = startIndex To rows.Count - 1
                    Dim row As DataRow = dt.NewRow()
                    Dim cells As XmlNodeList = rows(i).SelectNodes("ss:Cell", nsmgr)
                    Dim actualCellIndex As Integer = 0
                    For cellIndex As Integer = 0 To cells.Count - 1
                        Dim cell As XmlNode = cells(cellIndex)
                        If cell.Attributes("ss:Index") IsNot Nothing Then
                            actualCellIndex = Integer.Parse(cell.Attributes("ss:Index").Value) - 1
                        End If

                        Dim data As XmlNode = cell.SelectSingleNode("ss:Data", nsmgr)

                        If actualCellIndex >= dt.Columns.Count Then
                            For a As Integer = dt.Columns.Count To actualCellIndex - 1
                                dt.Columns.Add("Column" + actualCellIndex.ToString(), GetType(String))
                                columns.Add(getDefaultType())
                            Next
                            Dim autoDetectType As ColumnType = [getType](cell.SelectSingleNode("ss:Data", nsmgr))
                            dt.Columns.Add("Column" + actualCellIndex.ToString(), GetType(String))
                            columns.Add(autoDetectType)
                        End If

                        If data IsNot Nothing Then
                            row(actualCellIndex) = data.InnerText.Trim()
                        End If
                        actualCellIndex += 1
                    Next

                    dt.Rows.Add(row)
                Next
            End If
        Next
        Return ds

        '<?xml version="1.0"?>
        '<?mso-application progid="Excel.Sheet"?>
        '<Workbook>
        ' <Worksheet ss:Name="Sheet1">
        '  <Table>
        '   <Row>
        '    <Cell><Data ss:Type="String">Item Number</Data></Cell>
        '    <Cell><Data ss:Type="String">Description</Data></Cell>
        '    <Cell ss:StyleID="s21"><Data ss:Type="String">Item Barcode</Data></Cell>
        '   </Row>
        ' </Worksheet>
        '</Workbook>
    End Function

    Structure ColumnType
        Public type As Type
        Private name As String
        Public Sub New(type As Type)
            Me.type = type
            Me.name = type.ToString().ToLower()
        End Sub
        Public Function ParseString(input As String) As Object
            If [String].IsNullOrEmpty(input) Then
                Return DBNull.Value
            End If
            Select Case type.ToString().ToLower()
                Case "system.datetime"
                    Return DateTime.Parse(input)
                Case "system.decimal"
                    Return Decimal.Parse(input)
                Case "system.boolean"
                    Return Boolean.Parse(input)
                Case Else
                    Return input
            End Select
        End Function
    End Structure

    Private Shared Function getDefaultType() As ColumnType
        Return New ColumnType(GetType([String]))
    End Function

    Private Shared Function [getType](data As XmlNode) As ColumnType
        Dim type As String = Nothing
        If data.Attributes("ss:Type") Is Nothing OrElse data.Attributes("ss:Type").Value Is Nothing Then
            type = ""
        Else
            type = data.Attributes("ss:Type").Value
        End If

        Select Case type
            Case "DateTime"
                Return New ColumnType(GetType(DateTime))
            Case "Boolean"
                Return New ColumnType(GetType([Boolean]))
            Case "Number"
                Return New ColumnType(GetType([Decimal]))
            Case ""
                Dim test2 As Decimal
                If data Is Nothing OrElse [String].IsNullOrEmpty(data.InnerText) OrElse Decimal.TryParse(data.InnerText, test2) Then
                    Return New ColumnType(GetType([Decimal]))
                Else
                    Return New ColumnType(GetType([String]))
                End If
            Case Else
                '"String"
                Return New ColumnType(GetType([String]))
        End Select
    End Function  
public static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType)
{
	XmlDocument doc = new XmlDocument();
	doc.Load(new XmlTextReader(inputFileStream));
	XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);

	nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
	nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
	nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");

	DataSet ds = new DataSet();

	foreach (XmlNode node in doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
		DataTable dt = new DataTable(node.Attributes("ss:Name").Value);
		ds.Tables.Add(dt);
		XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
		if (rows.Count > 0) {
			List<ColumnType> columns = new List<ColumnType>();
			int startIndex = 0;

			if (hasHeaders) {
				foreach (XmlNode data in rows(0).SelectNodes("ss:Cell/ss:Data", nsmgr)) {
					columns.Add(new ColumnType(typeof(string)));
					//default to text
					dt.Columns.Add(data.InnerText, typeof(string));
				}
				startIndex += 1;
			}

			if (autoDetectColumnType && rows.Count > 0) {
				XmlNodeList cells = rows(startIndex).SelectNodes("ss:Cell", nsmgr);
				int actualCellIndex = 0;
				for (int cellIndex = 0; cellIndex <= cells.Count - 1; cellIndex++) {
					XmlNode cell = cells(cellIndex);
					if (cell.Attributes("ss:Index") != null) {
						actualCellIndex = int.Parse(cell.Attributes("ss:Index").Value) - 1;
					}

					ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));

					if (actualCellIndex >= dt.Columns.Count) {
						dt.Columns.Add("Column" + actualCellIndex.ToString(), autoDetectType.type);
						columns.Add(autoDetectType);
					} else {
						dt.Columns(actualCellIndex).DataType = autoDetectType.type;
						columns(actualCellIndex) = autoDetectType;
					}

					actualCellIndex += 1;
				}
			}

			for (int i = startIndex; i <= rows.Count - 1; i++) {
				DataRow row = dt.NewRow();
				XmlNodeList cells = rows(i).SelectNodes("ss:Cell", nsmgr);
				int actualCellIndex = 0;
				for (int cellIndex = 0; cellIndex <= cells.Count - 1; cellIndex++) {
					XmlNode cell = cells(cellIndex);
					if (cell.Attributes("ss:Index") != null) {
						actualCellIndex = int.Parse(cell.Attributes("ss:Index").Value) - 1;
					}

					XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);

					if (actualCellIndex >= dt.Columns.Count) {
						for (int a = dt.Columns.Count; a <= actualCellIndex - 1; a++) {
							dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));
							columns.Add(getDefaultType());
						}
						ColumnType autoDetectType = getType(cell.SelectSingleNode("ss:Data", nsmgr));
						dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));
						columns.Add(autoDetectType);
					}

					if (data != null) {
						row(actualCellIndex) = data.InnerText.Trim();
					}
					actualCellIndex += 1;
				}

				dt.Rows.Add(row);
			}
		}
	}
	return ds;

	//<?xml version="1.0"?>
	//<?mso-application progid="Excel.Sheet"?>
	//<Workbook>
	// <Worksheet ss:Name="Sheet1">
	//  <Table>
	//   <Row>
	//    <Cell><Data ss:Type="String">Item Number</Data></Cell>
	//    <Cell><Data ss:Type="String">Description</Data></Cell>
	//    <Cell ss:StyleID="s21"><Data ss:Type="String">Item Barcode</Data></Cell>
	//   </Row>
	// </Worksheet>
	//</Workbook>
}

struct ColumnType
{
	public Type type;
	private string name;
	public ColumnType(Type type)
	{
		this.type = type;
		this.name = type.ToString().ToLower();
	}
	public object ParseString(string input)
	{
		if (String.IsNullOrEmpty(input)) {
			return DBNull.Value;
		}
		switch (type.ToString().ToLower()) {
			case "system.datetime":
				return DateTime.Parse(input);
			case "system.decimal":
				return decimal.Parse(input);
			case "system.boolean":
				return bool.Parse(input);
			default:
				return input;
		}
	}
}

private static ColumnType getDefaultType()
{
	return new ColumnType(typeof(String));
}

private static ColumnType getType(XmlNode data)
{
	string type = null;
	if (data.Attributes("ss:Type") == null || data.Attributes("ss:Type").Value == null) {
		type = "";
	} else {
		type = data.Attributes("ss:Type").Value;
	}

	switch (type) {
		case "DateTime":
			return new ColumnType(typeof(DateTime));
		case "Boolean":
			return new ColumnType(typeof(Boolean));
		case "Number":
			return new ColumnType(typeof(Decimal));
		case "":
			decimal test2 = default(decimal);
			if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, test2)) {
				return new ColumnType(typeof(Decimal));
			} else {
				return new ColumnType(typeof(String));
			}
			break;
		default:
			//"String"
			return new ColumnType(typeof(String));
	}
}
 
Leave a comment

Posted by on July 6, 2012 in ASP.Net, C#, Excel, ExcelML, XLS, XML

 

Tags: , , , ,