Skip to main content

This SQL CLR string splitter handles multi-character delimiters, ignores consecutive delimiters as well as pre and post delimiters... and best of all, it won't get itself stuck in an infinite loop.

using System.Collections;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;

namespace SqlClrCommon
{
    /// <summary>
    /// SQL CLR String Splitting Function
    /// </summary>
    /// <example>
    ///     <code>
    ///         -- Split a pipe-delimeted string:
    ///         select * from dbo.Split(N'name|age|something', '|');
    ///
    ///         -- Using the functions against data in a table.
    ///         create table #temp( id int, stringData varchar(100));
    ///
    ///         insert into #temp values (1, 'one,two,three');
    ///         insert into #temp values (2, 'Apple,Banana,Pear');
    ///
    ///         select y.id, fn.string
    ///         from #temp as y
    ///         cross apply dbo.Split (y.stringData, ',') as fn;
    ///         drop table #temp;
    ///
    ///         -- Split an existing table column that has multiple
    ///         -- coid's for a single NPINumber:
    ///         select top 100 t.NPINumber,
    ///            split.item as coid
    ///         from dbo.zStage_ProviderDictionary as t
    ///         cross apply dbo.Split (t.FacCOID, '|') as split
    ///         where FacCOID like '%|%'
    ///         order by t.NPINumber;
    ///     </code>
    /// </example>
    /// <remarks>
    /// Hat tips:
    /// - http://sqlblog.com/blogs/adam_machanic/archive/2009/04/28/sqlclr-string-splitting-part-2-even-faster-even-more-scalable.aspx
    /// - https://www.mssqltips.com/sqlservertip/1665/sql-server-clr-and-sql-split-functions-to-parse-a-delimited-string/
    /// </remarks>
    public partial class SqlFunctions
    {
        [SqlFunction(
            Name = "Split",
            FillRowMethodName = "FillRowSplit",
            TableDefinition = "item nvarchar(4000)")
        ]
        public static IEnumerator Split(
            [SqlFacet(MaxSize = -1)] SqlChars field,
            [SqlFacet(MaxSize = 255)] SqlChars delimiter)
        {
            return (
                field.IsNull || delimiter.IsNull
                    ? new SplitEnumerator(
                        field: new char[0],
                        delimiter: new char[0])
                    : new SplitEnumerator(
                        field: field.Value,
                        delimiter: delimiter.Value)
                );
        }

        private static void FillRowSplit(object obj, out SqlString item)
        {
            item = new SqlString(obj as string);
        }

        private class SplitEnumerator : IEnumerator
        {
            private int _lastPos;
            private int _nextPos;

            private readonly char[] _field;
            private readonly char[] _delimiterChars;

            private readonly int _fieldLength;
            private readonly byte _delimiterLength;
            private readonly bool _isSingleCharacterDelimiter;

            public SplitEnumerator(char[] field, char[] delimiter)
            {
                _field = field;
                _fieldLength = field.Length;

                _delimiterChars = delimiter;
                _delimiterLength = (byte) (delimiter.Length);

                _isSingleCharacterDelimiter = (_delimiterLength == 1);

                _lastPos = 0;
                _nextPos = _delimiterLength*-1;
            }

            public object Current
            {
                get
                {
                    return new string(_field, _lastPos, _nextPos - _lastPos);
                }
            }

            public bool MoveNext()
            {
                if (_nextPos >= _fieldLength)
                {
                    return false;
                }

                _lastPos = _nextPos + _delimiterLength;

                for (int i = _lastPos; i < _fieldLength; i++)
                {
                    bool matches = true;

                    // optimize for single-character delimiters:
                    if (_isSingleCharacterDelimiter)
                    {
                        if (_field[i] != _delimiterChars[0])
                        {
                            matches = false;
                        }
                    }
                    else
                    {
                        for (byte j = 0; j < _delimiterLength; j++)
                        {
                            if (i + j >= _fieldLength || _field[i + j] != _delimiterChars[j])
                            {
                                matches = false;
                                break;
                            }
                        }
                    }

                    if (matches)
                    {
                        _nextPos = i;

                        // handle consecutive delimiters:
                        if (_nextPos - _lastPos > 0)
                        {
                            return true;
                        }

                        i += (_delimiterLength - 1);
                        _lastPos += _delimiterLength;
                    }
                }

                _lastPos = _nextPos + _delimiterLength;
                _nextPos = _fieldLength;

                return _nextPos - _lastPos > 0;
            }

            public void Reset()
            {
                _lastPos = 0;
                _nextPos = _delimiterLength*-1;
            }
        }
    }
}