The next thing you need in order to start creating data mining models with DMX in SSMS besides a data source is a data source view. For a UDM cube, it can be very complicated. For data mining models you usually need a singled table or two related tables only. I will create two versions of the procedure, i.e. two overloaded methods in a class. One will create a data source view with a single table, and the other one a data source view with two related tables, with relation limited to a single column only. As I mentioned, this should be enough for most of the cases; anyway, if you need more complex data source view, you can always expand existing methods or add another one.
I will use similar technique as I used for the procedure that creates a data source, which is described in my previous blog. You will have to be in the context of another AS database when executing the procedure. Besides that, the procedure will check whether the data source referred exists.
Data source view is presented in relational way, as collection of related tables. You create the schema very simply – you are actually creating an ADO.NET DataSet. Data source view inherits the schema from the DataSet. So, I will use DataAdapter to fill the schema of the DataTables in the DataSet, and then add a DataRelation to the Relations collection of the DataSet. The code:
using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.OleDb;
using AMO = Microsoft.AnalysisServices;
using ADOMD = Microsoft.AnalysisServices.AdomdServer;
namespace ServerProcs
{
public class ASProcs
{
// variant with a single table
public static void CreateDataSourceView(string asDbName, string inpRds, string dsvName, string ownName1, string tblName1)
// Call in DMX:
// Call ServerProcs.CreateDataSourceView(ASDatabaseName, Existing_DataSource, New_DataSourceView,
// Table1_Schema, Table1_Name)
// Example:
// Call ServerProcs.CreateDataSourceView(‘TestDb’, ‘pubs’, ‘pubs’,
// ‘dbo’, ‘sales’)
{
AMO.Server currSvr = new AMO.Server();
currSvr.CaptureXml = false;
currSvr.Connect(“DataSource=” + ADOMD.Context.CurrentServerID + “;”);
if (asDbName == ADOMD.Context.CurrentDatabaseName)
{
throw new ArgumentException(” Can’t update current database! “);
}
// Data source must exist
AMO.Database targetDb = currSvr.Databases.GetByName(asDbName);
if (!targetDb.DataSources.Contains(inpRds))
{
throw new ArgumentException(” Create Data Source first! “);
}
AMO.RelationalDataSource existingRds = (AMO.RelationalDataSource)targetDb.DataSources.GetByName(inpRds);
// New data source view
AMO.DataSourceView newDsv = new AMO.DataSourceView(dsvName, dsvName);
if (targetDb.DataSourceViews.Contains(dsvName))
{
targetDb.DataSourceViews.Remove(dsvName);
}
// Schema is created with ADO.NET DataSet
OleDbDataAdapter relAdapter = new OleDbDataAdapter(“”, existingRds.ConnectionString);
DataSet myDataSet = new DataSet();
relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName1 + “.” + tblName1;
relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName1);
// Schema is then mapped to the data source view
targetDb.DataSourceViews.Add(newDsv.ID);
targetDb.DataSourceViews[dsvName].DataSourceID = existingRds.ID;
targetDb.DataSourceViews[dsvName].Schema = myDataSet;
// Update the database
targetDb.Update(AMO.UpdateOptions.ExpandFull);
}
// overloaded variant with two tables connected with a foreign key on a single column
public static void CreateDataSourceView(string asDbName, string inpRds, string dsvName,
string ownName1, string tblName1, string ownName2, string tblName2, string colName)
// Call in DMX:
// Call ServerProcs.CreateDataSourceView(ASDatabaseName, Existing_DataSource, New_DataSourceView,
// Table1_Schema, Table1_Name, Table2_Schema, Table2_Name, FK_Column)
// Table1 is the parent (one side) table, Table2 is the child (many side) table
// Example:
// Call ServerProcs.CreateDataSourceView(‘TestDb’, ‘Northwind’, ‘Northwind’,
// ‘dbo’, ‘Customers’, ‘dbo’, ‘Orders’, ‘CustomerID’)
{
AMO.Server currSvr = new AMO.Server();
currSvr.CaptureXml = false;
currSvr.Connect(“DataSource=” + ADOMD.Context.CurrentServerID + “;”);
if (asDbName == ADOMD.Context.CurrentDatabaseName)
{
throw new ArgumentException(” Can’t update current database! “);
}
AMO.Database targetDb = currSvr.Databases.GetByName(asDbName);
if (!targetDb.DataSources.Contains(inpRds))
{
throw new ArgumentException(” Create Data Source first! “);
}
AMO.RelationalDataSource existingRds = (AMO.RelationalDataSource)targetDb.DataSources.GetByName(inpRds);
AMO.DataSourceView newDsv = new AMO.DataSourceView(dsvName, dsvName);
if (targetDb.DataSourceViews.Contains(dsvName))
{
targetDb.DataSourceViews.Remove(dsvName);
}
OleDbDataAdapter relAdapter = new OleDbDataAdapter(“”, existingRds.ConnectionString);
DataSet myDataSet = new DataSet();
relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName1 + “.” + tblName1;
relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName1);
relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName2 + “.” + tblName2;
relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName2);
myDataSet.Relations.Add(myDataSet.Tables[tblName1].Columns[colName],
myDataSet.Tables[tblName2].Columns[colName]);
targetDb.DataSourceViews.Add(newDsv.ID);
targetDb.DataSourceViews[dsvName].DataSourceID = existingRds.ID;
targetDb.DataSourceViews[dsvName].Schema = myDataSet;
targetDb.Update(AMO.UpdateOptions.ExpandFull);
}
}
}
Now it is very easy to create the data source view. here is the DMX code for both cases:
— Create a data source view
Call ServerProcs.CreateDataSourceView(‘TestDb’,
‘AdventureWorksDW’, ‘AdventureWorksDW’,
‘dbo’, ‘vTargetMail’);
— Another data source view, this time with two tables
Call ServerProcs.CreateDataSourceView(‘TestDb’,
‘AdventureWorksDW’, ‘AdventureWorksDW1’,
‘dbo’, ‘vAssocSeqOrders’,
‘dbo’, ‘vAssocSeqLineItems’,
‘OrderNumber’);
Finally, you have use the context of your target database by selecting it from the drop-down list in SSMS, and you can start creating data mining models:
— Create a model with structure
CREATE MINING MODEL [NBSample]
(
CustomerKey LONG KEY,
Gender TEXT DISCRETE,
[Number Cars Owned] LONG DISCRETE,
[Bike Buyer] LONG DISCRETE PREDICT
)
USING Microsoft_Naive_Bayes;
— Process the structure
INSERT INTO [NBSample_Structure]
(CustomerKey, Gender, [Number Cars Owned],[Bike Buyer])
OPENQUERY([AdventureWorksDW],
‘SELECT CustomerKey, Gender, [NumberCarsOwned], [BikeBuyer]
FROM [vTargetMail]’);
— Browse the model
SELECT *
FROM [NBSample].CONTENT;
— Prediction Join
— Singleton query
SELECT [NBSample].[Bike Buyer],
PredictHistogram([Bike Buyer])
FROM [NBSample] NATURAL PREDICTION JOIN
(SELECT ‘M’ AS [Gender],
0 AS [Number Cars Owned]) AS t;
— Openquery
SELECT t.[LastName],
t.[FirstName],
[NBSample].[Bike Buyer],
PredictProbability([Bike Buyer]) AS NBPredictProbability,
t.[Gender],
t.[NumberCarsOwned]
FROM [NBSample] PREDICTION JOIN
OPENQUERY([AdventureWorksDW],
‘SELECT [LastName],
[FirstName],
[Gender],
[NumberCarsOwned]
FROM [dbo].[vTargetMail]’) AS t
ON [NBSample].[Gender] = t.[Gender] AND
[NBSample].[Number Cars Owned] = t.[NumberCarsOwned];
That’s it – we are done. You can browse your model from SSMS with data mining viewers and you will see the model is perfect, just like it would be created with BIDS.