This SQL CLR string splitter handles multi-character delimiters, ignores consecutive delimiters as well as pre and post delimiters... and best of all, it won't get itself stuck in an infinite loop.
using System.Collections;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
namespace SqlClrCommon
{
/// <summary>
/// SQL CLR String Splitting Function
/// </summary>
/// <example>
/// <code>
/// -- Split a pipe-delimeted string:
/// select * from dbo.Split(N'name|age|something', '|');
///
/// -- Using the functions against data in a table.
/// create table #temp( id int, stringData varchar(100));
///
/// insert into #temp values (1, 'one,two,three');
/// insert into #temp values (2, 'Apple,Banana,Pear');
///
/// select y.id, fn.string
/// from #temp as y
/// cross apply dbo.Split (y.stringData, ',') as fn;
/// drop table #temp;
///
/// -- Split an existing table column that has multiple
/// -- coid's for a single NPINumber:
/// select top 100 t.NPINumber,
/// split.item as coid
/// from dbo.zStage_ProviderDictionary as t
/// cross apply dbo.Split (t.FacCOID, '|') as split
/// where FacCOID like '%|%'
/// order by t.NPINumber;
/// </code>
/// </example>
/// <remarks>
/// Hat tips:
/// - http://sqlblog.com/blogs/adam_machanic/archive/2009/04/28/sqlclr-string-splitting-part-2-even-faster-even-more-scalable.aspx
/// - https://www.mssqltips.com/sqlservertip/1665/sql-server-clr-and-sql-split-functions-to-parse-a-delimited-string/
/// </remarks>
public partial class SqlFunctions
{
[SqlFunction(
Name = "Split",
FillRowMethodName = "FillRowSplit",
TableDefinition = "item nvarchar(4000)")
]
public static IEnumerator Split(
[SqlFacet(MaxSize = -1)] SqlChars field,
[SqlFacet(MaxSize = 255)] SqlChars delimiter)
{
return (
field.IsNull || delimiter.IsNull
? new SplitEnumerator(
field: new char[0],
delimiter: new char[0])
: new SplitEnumerator(
field: field.Value,
delimiter: delimiter.Value)
);
}
private static void FillRowSplit(object obj, out SqlString item)
{
item = new SqlString(obj as string);
}
private class SplitEnumerator : IEnumerator
{
private int _lastPos;
private int _nextPos;
private readonly char[] _field;
private readonly char[] _delimiterChars;
private readonly int _fieldLength;
private readonly byte _delimiterLength;
private readonly bool _isSingleCharacterDelimiter;
public SplitEnumerator(char[] field, char[] delimiter)
{
_field = field;
_fieldLength = field.Length;
_delimiterChars = delimiter;
_delimiterLength = (byte) (delimiter.Length);
_isSingleCharacterDelimiter = (_delimiterLength == 1);
_lastPos = 0;
_nextPos = _delimiterLength*-1;
}
public object Current
{
get
{
return new string(_field, _lastPos, _nextPos - _lastPos);
}
}
public bool MoveNext()
{
if (_nextPos >= _fieldLength)
{
return false;
}
_lastPos = _nextPos + _delimiterLength;
for (int i = _lastPos; i < _fieldLength; i++)
{
bool matches = true;
// optimize for single-character delimiters:
if (_isSingleCharacterDelimiter)
{
if (_field[i] != _delimiterChars[0])
{
matches = false;
}
}
else
{
for (byte j = 0; j < _delimiterLength; j++)
{
if (i + j >= _fieldLength || _field[i + j] != _delimiterChars[j])
{
matches = false;
break;
}
}
}
if (matches)
{
_nextPos = i;
// handle consecutive delimiters:
if (_nextPos - _lastPos > 0)
{
return true;
}
i += (_delimiterLength - 1);
_lastPos += _delimiterLength;
}
}
_lastPos = _nextPos + _delimiterLength;
_nextPos = _fieldLength;
return _nextPos - _lastPos > 0;
}
public void Reset()
{
_lastPos = 0;
_nextPos = _delimiterLength*-1;
}
}
}
}