Skewness and Kurtosis can be calculated very efficiently with CLR User-Defined Aggregate functions (UDAs). As I mentioned in my previous blog, the formula for the skewness consists of sum of the deviations from the mean divided by standard deviation on the third degree and the formula for the kurtosis consists of sum of the deviations from the mean divided by standard deviation on the fourth degree. I start with some mathematics – expand the subtraction of the mean from the current value on the 3rd and 4th degree:
(x-µ) 3 = x3 – 3x2 µ + 3x µ2 – µ3
(x-µ) 4 = x4 – 4x3 µ + 6 x2µ2 – 4 xµ3 + µ4
Now from here is is easy – just use the fact that the sum is distributive over the product. Here is the C# code for both functions:
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
[Serializable]
[SqlUserDefinedAggregate(
Format.Native,
IsInvariantToDuplicates = false,
IsInvariantToNulls = true,
IsInvariantToOrder = true,
IsNullIfEmpty = false)]
public struct Skew
{
private double rx; // running sum of current values (x)
private double rx2; // running sum of squared current values (x^2)
private double r2x; // running sum of doubled current values (2x)
private double rx3; // running sum of current values raised to power 3 (x^3)
private double r3x2; // running sum of tripled squared current values (3x^2)
private double r3x; // running sum of tripled current values (3x)
private Int64 rn; // running count of rows
public void Init()
{
rx = 0;
rx2 = 0;
r2x = 0;
rx3 = 0;
r3x2 = 0;
r3x = 0;
rn = 0;
}
public void Accumulate(SqlDouble inpVal)
{
if (inpVal.IsNull)
{
return;
}
rx = rx + inpVal.Value;
rx2 = rx2 + Math.Pow(inpVal.Value, 2);
r2x = r2x + 2 * inpVal.Value;
rx3 = rx3 + Math.Pow(inpVal.Value, 3);
r3x2 = r3x2 + 3 * Math.Pow(inpVal.Value, 2);
r3x = r3x + 3 * inpVal.Value;
rn = rn + 1;
}
public void Merge(Skew Group)
{
this.rx = this.rx + Group.rx;
this.rx2 = this.rx2 + Group.rx2;
this.r2x = this.r2x + Group.r2x;
this.rx3 = this.rx3 + Group.rx3;
this.r3x2 = this.r3x2 + Group.r3x2;
this.r3x = this.r3x + Group.r3x;
this.rn = this.rn + Group.rn;
}
public SqlDouble Terminate()
{
double myAvg = (rx / rn);
double myStDev = Math.Pow((rx2 – r2x * myAvg + rn * Math.Pow(myAvg, 2)) / (rn – 1), 1d / 2d);
double mySkew = (rx3 – r3x2 * myAvg + r3x * Math.Pow(myAvg, 2) – rn * Math.Pow(myAvg, 3)) /
Math.Pow(myStDev,3) * rn / (rn – 1) / (rn – 2);
return (SqlDouble)mySkew;
}
}
[Serializable]
[SqlUserDefinedAggregate(
Format.Native,
IsInvariantToDuplicates = false,
IsInvariantToNulls = true,
IsInvariantToOrder = true,
IsNullIfEmpty = false)]
public struct Kurt
{
private double rx; // running sum of current values (x)
private double rx2; // running sum of squared current values (x^2)
private double r2x; // running sum of doubled current values (2x)
private double rx4; // running sum of current values raised to power 4 (x^4)
private double r4x3; // running sum of quadrupled current values raised to power 3 (4x^3)
private double r6x2; // running sum of squared current values multiplied by 6 (6x^2)
private double r4x; // running sum of quadrupled current values (4x)
private Int64 rn; // running count of rows
public void Init()
{
rx = 0;
rx2 = 0;
r2x = 0;
rx4 = 0;
r4x3 = 0;
r6x2 = 0;
r4x = 0;
rn = 0;
}
public void Accumulate(SqlDouble inpVal)
{
if (inpVal.IsNull)
{
return;
}
rx = rx + inpVal.Value;
rx2 = rx2 + Math.Pow(inpVal.Value, 2);
r2x = r2x + 2 * inpVal.Value;
rx4 = rx4 + Math.Pow(inpVal.Value, 4);
r4x3 = r4x3 + 4 * Math.Pow(inpVal.Value, 3);
r6x2 = r6x2 + 6 * Math.Pow(inpVal.Value, 2);
r4x = r4x + 4 * inpVal.Value;
rn = rn + 1;
}
public void Merge(Kurt Group)
{
this.rx = this.rx + Group.rx;
this.rx2 = this.rx2 + Group.rx2;
this.r2x = this.r2x + Group.r2x;
this.rx4 = this.rx4 + Group.rx4;
this.r4x3 = this.r4x3 + Group.r4x3;
this.r6x2 = this.r6x2 + Group.r6x2;
this.r4x = this.r4x + Group.r4x;
this.rn = this.rn + Group.rn;
}
public SqlDouble Terminate()
{
double myAvg = (rx / rn);
double myStDev = Math.Pow((rx2 – r2x * myAvg + rn * Math.Pow(myAvg, 2)) / (rn – 1), 1d / 2d);
double myKurt = (rx4 – r4x3 * myAvg + r6x2 * Math.Pow(myAvg, 2) – r4x * Math.Pow(myAvg, 3) + rn * Math.Pow(myAvg, 4)) /
Math.Pow(myStDev, 4) * rn * (rn + 1) / (rn – 1) / (rn – 2) / (rn – 3) –
3 * Math.Pow((rn – 1), 2) / (rn – 2) / (rn – 3);
return (SqlDouble)myKurt;
}
}
And finally the T-SQL code to check the functions:
— Skewness and Kurtosis with UDAs
USE AdventureWorksDW;
GO
EXEC sp_configure ‘clr enabled’, 1;
RECONFIGURE WITH OVERRIDE;
GO
— Load CS Assembly
CREATE ASSEMBLY DescriptiveStatistics
FROM ‘yourpathyourdllname.dll’
WITH PERMISSION_SET = SAFE;
GO
— Create the aggregates
CREATE AGGREGATE dbo.Skew(@s float)
RETURNS float
EXTERNAL NAME DescriptiveStatistics.Skew;
GO
CREATE AGGREGATE dbo.Kurt(@s float)
RETURNS float
EXTERNAL NAME DescriptiveStatistics.Kurt;
GO
— Test
SELECT dbo.Skew(Age), dbo.Kurt(Age)
FROM vTargetMail
GO
— Clean-up
DROP AGGREGATE dbo.Skew
DROP AGGREGATE dbo.Kurt
DROP ASSEMBLY DescriptiveStatistics
EXEC sp_configure ‘clr enabled’, 0;
RECONFIGURE WITH OVERRIDE;
GO