Friday 4 July 2008

XmlCompare utility for NUnit tests

Recently I have run into the usual problem with unit testing systems based on xml data – how to compare the xml output with expected document? I downloaded the standard XmlDiffPatch utility from Microsoft, probably from here: http://msdn.microsoft.com/en-us/xml/bb190622.aspx

But the main problem with this solution is that the difference report is not quite human readable, and way too complicated for me. In my previous company I had access to brilliant implementation created by Stefan, but this is not the case any more. That is why I had to write my own implementation, the solution with some unit tests can be found here.

My implementation gives you:
(1) Nice error messages, including the difference type, values that are different and XPaths pointing to these differences. It is so much easier to figure out what went wrong!
(2) A callback when a difference is found, where you can decide to ignore the difference. This is useful if your xml contains timestamp or some other data that is not necessarily identical.

Xml namespaces are not implemented at all, so if you wish to use it for a namespace sensitive content, then you need to add support for it yourself.

XmlComparer is the main class. The constructor takes the expected xml (the base for the comparison):

private readonly XmlDocument expectedDocument;

public XmlComparer(string expectedXml)
{
if(String.IsNullOrEmpty(expectedXml))
{
throw new ArgumentNullException("expectedXml");
}
expectedDocument = new XmlDocument();
expectedDocument.LoadXml(expectedXml);
}

The public method Compare(actualXml) and the two private worker methods CompareElements and CompareAttributes do the job:

public void Compare(string actualXml)
{
if (String.IsNullOrEmpty(actualXml))
{
throw new XmlDifferentException("Actual xml was null or empty");
}
XmlDocument actualDocument = new XmlDocument();
actualDocument.LoadXml(actualXml);
CompareElements(expectedDocument.DocumentElement, actualDocument.DocumentElement);
}

private void CompareElements(XmlNode expected, XmlNode actual)
{
if(expected==null && actual==null)
{
return;
}
if (expected == null)
{
HandleDifference(XmlDifferenceType.ExtraNode, null, GetXPath(actual));
return;
}
if (actual == null)
{
HandleDifference(XmlDifferenceType.MissingNode, GetXPath(expected), null);
return;
}
if(expected.Name != actual.Name)
{
HandleDifference(XmlDifferenceType.DifferentNode, GetXPath(expected), GetXPath(actual), expected.LocalName, actual.LocalName);
}
if(expected.NodeType != actual.NodeType)
{
//TODO more details needed?
HandleDifference(XmlDifferenceType.DifferentNode, GetXPath(expected), GetXPath(actual), expected.NodeType.ToString(), actual.NodeType.ToString());
}
if(expected.Value != actual.Value)
{
HandleDifference(XmlDifferenceType.NodeValue, GetXPath(expected), GetXPath(actual), expected.Value, actual.Value);
}

CompareAttributes(expected, actual);

//compare children
for(int i=0; i < actual.ChildNodes.Count || i < expected.ChildNodes.Count; i++)
{
// ChildNodes[i] returns null if the node doesn't exist
CompareElements(expected.ChildNodes[i], actual.ChildNodes[i]);
}
}

private void CompareAttributes(XmlNode expected, XmlNode actual)
{
if (expected.Attributes == actual.Attributes)
{
return;
}
if (expected.Attributes == null)
{
foreach (XmlAttribute actualAttribute in actual.Attributes)
{
HandleDifference(XmlDifferenceType.MissingAttribute, null, GetXPath(actualAttribute));
}
return;
}
if(actual.Attributes == null)
{
foreach (XmlAttribute expectedAttribute in expected.Attributes)
{
HandleDifference(XmlDifferenceType.ExtraAttribute, GetXPath(expectedAttribute), null);
}
return;
}

foreach (XmlAttribute expectedAttribute in expected.Attributes)
{
if (!IsSpecialAttribute(expectedAttribute))
{
XmlAttribute actualAttribute = actual.Attributes[expectedAttribute.Name];
if (actualAttribute == null)
{
HandleDifference(XmlDifferenceType.MissingAttribute, GetXPath(expectedAttribute), null);
continue;
}
if (actualAttribute.Value != expectedAttribute.Value)
{
HandleDifference(XmlDifferenceType.AttributeValue, GetXPath(expectedAttribute),
GetXPath(actualAttribute), expectedAttribute.Value, actualAttribute.Value);
}
}
}

foreach (XmlAttribute actualAttribute in actual.Attributes)
{
XmlAttribute expectedAttribute = expected.Attributes[actualAttribute.Name];
if(expectedAttribute==null && !IsSpecialAttribute(actualAttribute))
{
HandleDifference(XmlDifferenceType.ExtraAttribute, null, GetXPath(actualAttribute));
}
}
}

So as you can see I just compare elements and attributes one by one, ignoring namespaces in a bit brutal way:

private const string XMLNS = "xmlns";
private static bool IsSpecialAttribute(XmlAttribute attribute)
{
if(attribute.Name == XMLNS || attribute.Prefix == XMLNS)
{
return true;
}
return false;
}

The real power of this tool lies in the information that is contained in the error message and the callbacks. In the code above you probably noticed the GetXPath method I use to get an XPath. The trick here is that I always get an XPath with the element number in square brackets:

private static string GetXPath(XmlNode node)
{
StringBuilder xPathSB = new StringBuilder();
if(node == null)
{
return null;
}
XmlAttribute attribute = node as XmlAttribute;
if(attribute!=null)
{
GetXPathForNodeRecursive(attribute.OwnerElement, xPathSB);
xPathSB.AppendFormat("/@{0}", attribute.Name);
}
else if(node.NodeType == XmlNodeType.Text)
{
GetXPathForNodeRecursive(node.ParentNode, xPathSB);
xPathSB.Append("/text()");
}
else
{
GetXPathForNodeRecursive(node, xPathSB);
}
return xPathSB.ToString();
}

private static void GetXPathForNodeRecursive(XmlNode node, StringBuilder xPathSB)
{
int number = 1;
XmlNode sibling = node.PreviousSibling;
while (sibling != null)
{
if(sibling.Name == node.Name)
{
number++;
}
sibling = sibling.PreviousSibling;
}
if (node.ParentNode!=null && node.ParentNode.NodeType != XmlNodeType.Document)
{
GetXPathForNodeRecursive(node.ParentNode, xPathSB);
}
xPathSB.AppendFormat("/{0}[{1}]", node.LocalName, number);
}

The HandleDifference method is responsible for raising the event and throwing an exception if the difference was not handled by the user:

public event EventHandler DifferenceFound;
private void HandleDifference(XmlDifferenceType differenceType, string expectedXPath, string actualXPath)
{
HandleDifference(differenceType, expectedXPath, actualXPath, null, null);
}

private void HandleDifference(XmlDifferenceType differenceType, string expectedXPath, string actualXPath, string valueExpected, string valueActual)
{
XmlDifferentEventArgs args = new XmlDifferentEventArgs(differenceType, expectedXPath, actualXPath, valueExpected, valueActual);
if (DifferenceFound!=null)
{
DifferenceFound(this, args);
}
if(!args.Handled)
{
throw new XmlDifferentException(args.ToString());
}
}

The type of the difference is described by the XmlDifferenceType enum, which is fairly self-expalnatory:

public enum XmlDifferenceType
{
DifferentNode,
ExtraNode,
MissingNode,
ExtraAttribute,
MissingAttribute,
AttributeValue,
NodeValue
}

In the DifferenceFound event handler the user gets XmlDifferentEventArgs, which contains all the details about the difference. If the user wants to ignore the difference, she needs to set the Handled flag true.

public class XmlDifferentEventArgs : EventArgs
{
private readonly XmlDifferenceType differenceType;
private readonly string expectedXPath;
private readonly string actualXPath;
private readonly string valueExpected;
private readonly string valueActual;
private bool handled = false;

public XmlDifferentEventArgs(XmlDifferenceType differenceType, string expectedXPath, string actualXPath) : this(differenceType, expectedXPath, actualXPath, null, null)
{}

public XmlDifferentEventArgs(XmlDifferenceType differenceType, string expectedXPath, string actualXPath,string valueExpected, string valueActual)
{
this.differenceType = differenceType;
this.valueActual = valueActual;
this.valueExpected = valueExpected;
this.actualXPath = actualXPath;
this.expectedXPath = expectedXPath;
}

public XmlDifferenceType DifferenceType
{
get { return differenceType; }
}

public string ExpectedXPath
{
get { return expectedXPath; }
}

public string ActualXPath
{
get { return actualXPath; }
}

public bool Handled
{
get { return handled; }
set { handled = value; }
}

public string ValueExpected
{
get { return valueExpected; }
}

public string ValueActual
{
get { return valueActual; }
}

public override string ToString()
{
return
String.Format(
"Difference type: {0}, expected: {1}, actual: {2}\r\nPosition in expected document: {3}\r\nPosition in actual document: {4}", differenceType, valueExpected ?? "", valueActual ?? "", expectedXPath ?? "", actualXPath ?? "");
}
}


USAGE
To compare two xmls that should be identical, you can use the AssertAreEqual static method from CompareXml class:

public static void AssertAreEqual(string expected, string actual)
{
XmlComparer compare = new XmlComparer(expected);
compare.Compare(actual);
}

The method above will throw an exception for every difference found. If you are expecting some differences, you should define your own event handler to ignore these differences. TODO

Please let me know if you find any bugs of have any comments about this tool.

Thursday 3 July 2008

Lazy nature of yield return

It is commonly believed that using yield return mechanism in .Net 2 is ‘equivalent’ to creating your own collection and returning it as an IEnumerable (ignoring performance, which I have never tested myself). However, today I realized that the behaviour is actually quite different. When you use yield return, your code to generate subsequent elements of the collection is executed as you need them. When you create your own collection first on the other hand, you generate all the elements first, and use them later. Let’s look at a simple example:

public IEnumerable ThrowExeptionForNullParameterYieldReturn(string argument)
{
if(argument==null)
{
throw new ArgumentNullException("argument");
}
yield return 0;
}

[Test]
public void ExceptionWithYieldReturnIsConsumed()
{
Assert.IsNotNull(ThrowExeptionForNullParameterYieldReturn(null));
}

Do you think that this test will pass or fail? It seems quite obvious that it will fail with the ArgumentNullException, before we even get to the first yield return statement, yes? Actually, not at all! The methods using yield return methods are not ordinary ones that just execute when you call them. This test will pass and create a valid IEnumerable object, and none of the code in ThrowExeptionForNullParameterYieldReturn is going to execute. Try yourself, place a break point on the first line of the function, run the test in debugger and notice that the break point is not going to be hit at all.

The code checking the argument is going to run in a lazy fashion, during the first MoveNext() method on the enumerator:

[Test]
[ExpectedException(
ExceptionType = typeof(ArgumentNullException),
ExpectedMessage = "Value cannot be null.\r\nParameter name: argument")]
public void ExceptionWithYieldReturnIsThrownInMoveNext()
{
IEnumerable myStuff = ThrowExeptionForNullParameterYieldReturn(null);
myStuff.GetEnumerator().MoveNext();
}

The test above passes, so now the exception we expected was really thrown. And when using the traditional approach with creating our own collection, we get the exception before we get hold of the IEnumerable, as expected:

public IEnumerable ThrowExeptionForNullParameterList(string argument)
{
if (argument == null)
{
throw new ArgumentNullException("argument");
}
List result = new List();
Result.Add(0);
return result;
}

[Test]
[ExpectedException(
ExceptionType = typeof(ArgumentNullException),
ExpectedMessage = "Value cannot be null.\r\nParameter name: argument")]
public void ExceptionWithListIsThrownAtTheBeginning()
{
ThrowExeptionForNullParameterList(null);
}

The fact that yield return results in a lazy solution is quite important, as it has great impact not only on validating parameters, but also if your function has some side-effects. Imagine logic like this:

IEnumerable GetCustomers()
{
foreach(Customer customer in Customers)
{
if (customer.EligibleOrThrow)
{
yield return customer;
}
}
}

void Pay()
{
IEnumerable eligibleCustomers = GetCustomers();
foreach (Customer eligibleCustomer in eligibleCustomers)
{
customer.Pay(10000);
}
}

So, who did your system pay if one of the customer.EligibleOrThrow throws?