Skewness and Kurtosis can be calculated very efficiently with CLR User-Defined Aggregate functions (UDAs). As I mentioned in my previous blog, the formula for the skewness consists of sum of the deviations from the mean divided by standard deviation on the third degree and the formula for the kurtosis consists of sum of the deviations from the mean divided by standard deviation on the fourth degree. I start with some mathematics – expand the subtraction of the mean from the current value on the 3rd and 4th degree:

(x-µ) 3 = x3 – 3x2 µ + 3x µ2 – µ3

(x-µ) 4 = x4 – 4x3 µ + 6 x2µ2 – 4 xµ3 + µ4

Now from here is is easy – just use the fact that the sum is distributive over the product. Here is the C# code for both functions:

using System;

using System.Data;

using System.Data.SqlClient;

using System.Data.SqlTypes;

using Microsoft.SqlServer.Server;

[Serializable]

[SqlUserDefinedAggregate(

Format.Native,

IsInvariantToDuplicates = false,

IsInvariantToNulls = true,

IsInvariantToOrder = true,

IsNullIfEmpty = false)]

public struct Skew

{

private double rx;         // running sum of current values (x)

private double rx2;        // running sum of squared current values (x^2)

private double r2x;        // running sum of doubled current values (2x)

private double rx3;        // running sum of current values raised to power 3 (x^3)

private double r3x2;       // running sum of tripled squared current values (3x^2)

private double r3x;        // running sum of tripled current values (3x)

private Int64 rn;          // running count of rows

public void Init()

{

rx = 0;

rx2 = 0;

r2x = 0;

rx3 = 0;

r3x2 = 0;

r3x = 0;

rn = 0;

}

public void Accumulate(SqlDouble inpVal)

{

if (inpVal.IsNull)

{

return;

}

rx = rx + inpVal.Value;

rx2 = rx2 + Math.Pow(inpVal.Value, 2);

r2x = r2x + 2 * inpVal.Value;

rx3 = rx3 + Math.Pow(inpVal.Value, 3);

r3x2 = r3x2 + 3 * Math.Pow(inpVal.Value, 2);

r3x = r3x + 3 * inpVal.Value;

rn = rn + 1;

}

public void Merge(Skew Group)

{

this.rx = this.rx + Group.rx;

this.rx2 = this.rx2 + Group.rx2;

this.r2x = this.r2x + Group.r2x;

this.rx3 = this.rx3 + Group.rx3;

this.r3x2 = this.r3x2 + Group.r3x2;

this.r3x = this.r3x + Group.r3x;

this.rn = this.rn + Group.rn;

}

public SqlDouble Terminate()

{

double myAvg = (rx / rn);

double myStDev = Math.Pow((rx2 – r2x * myAvg + rn * Math.Pow(myAvg, 2)) / (rn – 1), 1d / 2d);

double mySkew = (rx3 – r3x2 * myAvg + r3x * Math.Pow(myAvg, 2) – rn * Math.Pow(myAvg, 3)) /

Math.Pow(myStDev,3) * rn / (rn – 1) / (rn – 2);

return (SqlDouble)mySkew;

}

}

[Serializable]

[SqlUserDefinedAggregate(

Format.Native,

IsInvariantToDuplicates = false,

IsInvariantToNulls = true,

IsInvariantToOrder = true,

IsNullIfEmpty = false)]

public struct Kurt

{

private double rx;         // running sum of current values (x)

private double rx2;        // running sum of squared current values (x^2)

private double r2x;        // running sum of doubled current values (2x)

private double rx4;        // running sum of current values raised to power 4 (x^4)

private double r4x3;       // running sum of quadrupled current values raised to power 3 (4x^3)

private double r6x2;       // running sum of squared current values multiplied by 6 (6x^2)

private double r4x;        // running sum of quadrupled current values (4x)

private Int64 rn;          // running count of rows

public void Init()

{

rx = 0;

rx2 = 0;

r2x = 0;

rx4 = 0;

r4x3 = 0;

r6x2 = 0;

r4x = 0;

rn = 0;

}

public void Accumulate(SqlDouble inpVal)

{

if (inpVal.IsNull)

{

return;

}

rx = rx + inpVal.Value;

rx2 = rx2 + Math.Pow(inpVal.Value, 2);

r2x = r2x + 2 * inpVal.Value;

rx4 = rx4 + Math.Pow(inpVal.Value, 4);

r4x3 = r4x3 + 4 * Math.Pow(inpVal.Value, 3);

r6x2 = r6x2 + 6 * Math.Pow(inpVal.Value, 2);

r4x = r4x + 4 * inpVal.Value;

rn = rn + 1;

}

public void Merge(Kurt Group)

{

this.rx = this.rx + Group.rx;

this.rx2 = this.rx2 + Group.rx2;

this.r2x = this.r2x + Group.r2x;

this.rx4 = this.rx4 + Group.rx4;

this.r4x3 = this.r4x3 + Group.r4x3;

this.r6x2 = this.r6x2 + Group.r6x2;

this.r4x = this.r4x + Group.r4x;

this.rn = this.rn + Group.rn;

}

public SqlDouble Terminate()

{

double myAvg = (rx / rn);

double myStDev = Math.Pow((rx2 – r2x * myAvg + rn * Math.Pow(myAvg, 2)) / (rn – 1), 1d / 2d);

double myKurt = (rx4 – r4x3 * myAvg + r6x2 * Math.Pow(myAvg, 2) – r4x * Math.Pow(myAvg, 3) + rn * Math.Pow(myAvg, 4)) /

Math.Pow(myStDev, 4) * rn * (rn + 1) / (rn – 1) / (rn – 2) / (rn – 3) –

3 * Math.Pow((rn – 1), 2) / (rn – 2) / (rn – 3);

return (SqlDouble)myKurt;

}

}

And finally the T-SQL code to check the functions:

— Skewness and Kurtosis with UDAs

GO

EXEC sp_configure ‘clr enabled’, 1;

RECONFIGURE WITH OVERRIDE;

GO

CREATE ASSEMBLY DescriptiveStatistics

FROM yourpathyourdllname.dll’

WITH PERMISSION_SET = SAFE;

GO

— Create the aggregates

CREATE AGGREGATE dbo.Skew(@s float)

RETURNS float

EXTERNAL NAME DescriptiveStatistics.Skew;

GO

CREATE AGGREGATE dbo.Kurt(@s float)

RETURNS float

EXTERNAL NAME DescriptiveStatistics.Kurt;

GO

— Test

SELECT dbo.Skew(Age), dbo.Kurt(Age)

FROM vTargetMail

GO

— Clean-up

DROP AGGREGATE dbo.Skew

DROP AGGREGATE dbo.Kurt

DROP ASSEMBLY DescriptiveStatistics

EXEC sp_configure ‘clr enabled’, 0;

RECONFIGURE WITH OVERRIDE;

GO