One of the ones I have always been miffed about is FIRST. How many times are you neck-deep in an aggregate query and you just don't care what data is in a certain column, since all the results will be the same anyway? Using MAX() might seem perfectly reasonable, but if there are minute differences in the data you can end up "mixing and matching" against different rows. Consider the following situation:
FirstName | LastName | MiddleInitial | EmployeeID | Paycheck | PayDate |
---|---|---|---|---|---|
John | Smith | A | 12345 | $2,000.00 | Jan 25, 2013 |
JOHN | SMITH | ALBERT | 12345 | $2,000.00 | Jan 19, 2013 |
MAX() will examine the data in the column and return the maximum value, but you can get odd results.
select MAX(firstname), MAX(lastname), MAX(middleinital) from #foo;
select MAX(firstname), MAX(lastname), MAX(middleinital) from #foo;
John | Smith | ALBERT |
As you can see here, it's more or less the same data, but it can be jarring. This gets to be a bigger deal when dealing with columns that can depend on each other, like Address1 and Address2; different data can be in these, and together they represent a consistent, usable mailing address. Individually, however, they may give and end result that is really confusing:
Employeeid | Address1 | Address2 |
12345 | 25 East Nowhere | Suite 001A |
12345 | 25 East Nowhere St. Suite 001A | Attention: Bill |
...try THAT with MAX()...
So what to do? The answer is, FIRST. Grab the first row that has data in that column. The CLR code looks like this:
using System;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(Format.UserDefined, MaxByteSize = -1)]
public struct First : IBinarySerialize
{
public string first;
public bool HasRows;
public void Init()
{
HasRows = false;
first = null;
}
public void Accumulate(SqlString Value)
{
HasRows = true;
if (!Value.IsNull && first == null)
{
first = Value.Value;
}
}
public void Merge(First Group)
{
HasRows = Group.HasRows;
if (Group.first != null) first = Group.first;
}
public SqlString Terminate()
{
if (!HasRows) return SqlString.Null;
return new SqlString(first);
}
public void Read(System.IO.BinaryReader r)
{
HasRows = r.ReadBoolean();
var isNull = r.ReadBoolean();
first = isNull ? null : r.ReadString();
}
public void Write(System.IO.BinaryWriter w)
{
w.Write(HasRows);
var isNull = (first == null);
w.Write(isNull);
if (!isNull) w.Write(first);
}
}
So what to do? The answer is, FIRST. Grab the first row that has data in that column. The CLR code looks like this:
using System;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(Format.UserDefined, MaxByteSize = -1)]
public struct First : IBinarySerialize
{
public string first;
public bool HasRows;
public void Init()
{
HasRows = false;
first = null;
}
public void Accumulate(SqlString Value)
{
HasRows = true;
if (!Value.IsNull && first == null)
{
first = Value.Value;
}
}
public void Merge(First Group)
{
HasRows = Group.HasRows;
if (Group.first != null) first = Group.first;
}
public SqlString Terminate()
{
if (!HasRows) return SqlString.Null;
return new SqlString(first);
}
public void Read(System.IO.BinaryReader r)
{
HasRows = r.ReadBoolean();
var isNull = r.ReadBoolean();
first = isNull ? null : r.ReadString();
}
public void Write(System.IO.BinaryWriter w)
{
w.Write(HasRows);
var isNull = (first == null);
w.Write(isNull);
if (!isNull) w.Write(first);
}
}
How about a bitwise OR aggregation? Looking for any 1 in a group of bits...
using System;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(Format.Native)]
public struct GroupOR
{
public bool hasRows;
public bool hasTrue;
public void Init()
{
hasRows = false;
hasTrue = false;
}
public void Accumulate(SqlBoolean Value)
{
hasRows = true;
if (Value.IsTrue) hasTrue = true;
}
public void Merge(GroupOR Group)
{
hasTrue = Group.hasTrue;
hasRows = Group.hasRows;
}
public SqlBoolean Terminate()
{
if (!hasRows) return SqlBoolean.Null;
if (hasTrue) return SqlBoolean.One;
return SqlBoolean.Zero;
}
}
...likewise, there is a bitwise AND... where all bits MUST be one...
using System;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(Format.Native)]
public struct GroupAND
{
public bool hasRows;
public bool hasFalse;
public bool hasNull;
public void Init()
{
hasRows = false;
hasFalse = false;
hasNull = false;
}
public void Accumulate(SqlBoolean Value)
{
hasRows = true;
if (Value.IsFalse) hasFalse = true;
if (Value.IsNull) hasNull = true;
}
public void Merge(GroupAND Group)
{
hasFalse = Group.hasFalse;
hasNull = Group.hasNull;
hasRows = Group.hasRows;
}
public SqlBoolean Terminate()
{
if (!hasRows) return SqlBoolean.Null;
if (hasFalse || hasNull) return SqlBoolean.Zero;
return SqlBoolean.One;
}
}
You get the idea. I use these kinds of functions every day.
No comments:
Post a Comment