Static C# class with useful string manipulation methods.
#region Disclaimer/Info
///////////////////////////////////////////////////////////////////////////////////////////////////
// Subtext WebLog
//
// Subtext is an open source weblog system that is a fork of the .TEXT
// weblog system.
//
// For updated news and information please visit http://subtextproject.com/
// Subtext is hosted at Google Code at http://code.google.com/p/subtext/
// The development mailing list is at subtext@googlegroups.com
//
// This project is licensed under the BSD license. See the License.txt file for more information.
///////////////////////////////////////////////////////////////////////////////////////////////////
#endregion
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;
using System.Web;
using Subtext.Framework.Properties;
// adapted from namespace Haack.Text
namespace Subtext.Framework.Text
{
/// <summary>
/// Static class with useful string manipulation methods.
/// </summary>
public static class StringHelper
{
private static readonly Regex NumericRegex = new Regex(@"^\d+$", RegexOptions.Compiled);
private static readonly Regex SplitWordsRegex = new Regex(@"\W+", RegexOptions.Compiled);
public static string NullIfEmpty(this string s)
{
if (String.IsNullOrEmpty(s))
{
return null;
}
return s;
}
public static string Remove(this string original, string textToRemove, int occurrenceCount,
StringComparison comparison)
{
if (!original.Contains(textToRemove, comparison))
{
return original;
}
string result = original;
for (int i = 0; i < occurrenceCount; i++)
{
result = result.LeftBefore(textToRemove, comparison) + result.RightAfter(textToRemove, comparison);
if (!result.Contains(textToRemove, comparison))
{
return result;
}
}
return result;
}
/// <summary>
/// Removes any double instances of the specified character.
/// So "--" becomes "-" if the character is '-'.
/// </summary>
/// <param name="text">The text.</param>
/// <param name="character">The character.</param>
/// <returns></returns>
public static string RemoveDoubleCharacter(this string text, char character)
{
if (text == null)
{
throw new ArgumentNullException("text");
}
if (character == char.MinValue)
{
return text;
}
var newString = new char[text.Length];
int i = 0;
bool lastCharIsOurChar = false;
foreach (char c in text)
{
if (c != character || !lastCharIsOurChar)
{
newString[i] = c;
i++;
}
lastCharIsOurChar = (c == character);
}
return new string(newString, 0, i);
}
public static IEnumerable<string> SplitIntoWords(this string source)
{
return SplitWordsRegex.Split(source.Trim());
}
/// <summary>
/// Converts text to pascal case...
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public static string ToPascalCase(this string text)
{
if (text == null)
{
throw new ArgumentNullException("text");
}
if (text.Length == 0)
{
return text;
}
string[] words = text.Split(' ');
for (int i = 0; i < words.Length; i++)
{
if (words[i].Length > 0)
{
string word = words[i];
char firstChar = char.ToUpper(word[0], CultureInfo.InvariantCulture);
words[i] = firstChar + word.Substring(1);
}
}
return string.Join(string.Empty, words);
}
/// <summary>
/// Returns a string containing a specified number of characters from the left side of a string.
/// </summary>
/// <param name="str">Required. String expression from which the leftmost characters are returned.</param>
/// <param name="length">Required. Integer greater than 0. Numeric expression
/// indicating how many characters to return. If 0, a zero-length string ("")
/// is returned. If greater than or equal to the number of characters in Str,
/// the entire string is returned. If str is null, this returns null.</param>
/// <returns></returns>
/// <exception cref="ArgumentOutOfRangeException">Thrown if length is less than 0</exception>
/// <exception cref="ArgumentNullException">Thrown if str is null.</exception>
public static string Left(this string str, int length)
{
if (str == null)
return null;
if (length >= str.Length)
{
return str;
}
return str.Substring(0, length);
}
/// <summary>
/// Returns a string containing a specified number of characters from the right side of a string.
/// </summary>
/// <param name="original">Required. String expression from which the rightmost characters are returned.</param>
/// <param name="length">Required. Integer greater than 0. Numeric expression
/// indicating how many characters to return. If 0, a zero-length string ("")
/// is returned. If greater than or equal to the number of characters in Str,
/// the entire string is returned. If str is null, this returns null.</param>
/// <returns></returns>
/// <exception cref="ArgumentOutOfRangeException">Thrown if length is less than 0</exception>
/// <exception cref="ArgumentNullException">Thrown if str is null.</exception>
public static string Right(this string original, int length)
{
if (original == null)
{
throw new ArgumentNullException("original");
}
if (length < 0)
{
throw new ArgumentOutOfRangeException("length", length,
Resources.ArgumentOutOfRange_LengthMustNotBeNegative);
}
if (original.Length == 0 || length == 0)
{
return String.Empty;
}
if (length >= original.Length)
{
return original;
}
return original.Substring(original.Length - length);
}
/// <summary>
/// Returns a string containing every character within a string after the
/// first occurrence of another string.
/// </summary>
/// <param name="original">Required. String expression from which the rightmost characters are returned.</param>
/// <param name="search">The string where the end of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string RightAfter(this string original, string search)
{
return RightAfter(original, search, StringComparison.InvariantCulture);
}
/// <summary>
/// Returns a string containing every character within a string after the
/// first occurrence of another string.
/// </summary>
/// <param name="original">Required. String expression from which the rightmost characters are returned.</param>
/// <param name="search">The string where the end of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <param name="comparisonType">Determines whether or not to use case sensitive search.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string RightAfter(this string original, string search, StringComparison comparisonType)
{
if (original == null)
{
throw new ArgumentNullException("original");
}
if (search == null)
{
throw new ArgumentNullException("search");
}
//Shortcut.
if (search.Length > original.Length || search.Length == 0)
{
return original;
}
int searchIndex = original.IndexOf(search, 0, comparisonType);
if (searchIndex < 0)
{
return original;
}
return Right(original, original.Length - (searchIndex + search.Length));
}
/// <summary>
/// Returns a string containing every character within a string after the
/// last occurrence of another string.
/// </summary>
/// <param name="original">Required. String expression from which the rightmost characters are returned.</param>
/// <param name="search">The string where the end of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string RightAfterLast(this string original, string search)
{
return RightAfterLast(original, search, original.Length - 1, StringComparison.InvariantCulture);
}
/// <summary>
/// Returns a string containing every character within a string after the
/// last occurrence of another string.
/// </summary>
/// <param name="original">Required. String expression from which the rightmost characters are returned.</param>
/// <param name="search">The string where the end of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <param name="startIndex">The start index.</param>
/// <param name="comparisonType">Determines whether or not to use case sensitive search.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string RightAfterLast(this string original, string search, int startIndex,
StringComparison comparisonType)
{
if (original == null)
{
throw new ArgumentNullException("original");
}
if (search == null)
{
throw new ArgumentNullException("search");
}
//Shortcut.
if (search.Length > original.Length || search.Length == 0)
{
return original;
}
int searchIndex = original.LastIndexOf(search, startIndex, comparisonType);
if (searchIndex < 0)
{
return original;
}
return Right(original, original.Length - (searchIndex + search.Length));
}
/// <summary>
/// Returns a string containing every character within a string before the
/// first occurrence of another string.
/// </summary>
/// <param name="str">Required. String expression from which the leftmost characters are returned.</param>
/// <param name="search">The string where the beginning of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string LeftBefore(string str, string search)
{
return LeftBefore(str, search, StringComparison.InvariantCulture);
}
/// <summary>
/// Returns a string containing every character within a string before the
/// first occurrence of another string.
/// </summary>
/// <param name="original">Required. String expression from which the leftmost characters are returned.</param>
/// <param name="search">The string where the beginning of it marks the
/// characters to return. If the string is not found, the whole string is
/// returned.</param>
/// <param name="comparisonType">Determines whether or not to use case sensitive search.</param>
/// <returns></returns>
/// <exception cref="ArgumentNullException">Thrown if str or searchstring is null.</exception>
public static string LeftBefore(this string original, string search, StringComparison comparisonType)
{
if (original == null)
{
throw new ArgumentNullException("original");
}
if (search == null)
{
throw new ArgumentNullException("search");
}
//Shortcut.
if (search.Length > original.Length || search.Length == 0)
{
return original;
}
int searchIndex = original.IndexOf(search, 0, comparisonType);
if (searchIndex < 0)
{
return original;
}
return original.Left(searchIndex);
}
/// <summary>
/// Returns true if the the specified container string contains the
/// contained string.
/// </summary>
/// <param name="container">Container.</param>
/// <param name="contained">Contained.</param>
/// <param name="comparison">Case sensitivity.</param>
/// <returns></returns>
public static bool Contains(this string container, string contained, StringComparison comparison)
{
return container.IndexOf(contained, comparison) >= 0;
}
/// <summary>
/// Determines whether the specified text is a numeric... or to be
/// more precise, if the text is an integer.
/// </summary>
/// <param name="text">The text.</param>
/// <returns>
/// <c>true</c> if the specified text is numeric; otherwise, <c>false</c>.
/// </returns>
public static bool IsNumeric(this string text)
{
return NumericRegex.IsMatch(text);
}
public static string EncodeForMailTo(this string s)
{
return HttpUtility.UrlEncode(HttpUtility.HtmlAttributeEncode(s.RemoveHtml().Replace("\"", "'"))).Replace("+", " ");
}
public static string EncodeMailToBody(this string body)
{
return body.Replace(Environment.NewLine, "%0A").EncodeForMailTo();
}
public static string NamedFormat(this string format, object source)
{
if (format == null)
{
throw new ArgumentNullException("format");
}
string[] formattedStrings = (from expression in format.SplitFormat()
select expression.Eval(source)).ToArray();
return String.Join(string.Empty, formattedStrings);
}
private static IEnumerable<ITextExpression> SplitFormat(this string format)
{
int exprEndIndex = -1;
int expStartIndex;
do
{
expStartIndex = format.IndexOfExpressionStart(exprEndIndex + 1);
if (expStartIndex < 0)
{
//everything after last end brace index.
if (exprEndIndex + 1 < format.Length)
{
yield return new LiteralFormat(
format.Substring(exprEndIndex + 1));
}
break;
}
if (expStartIndex - exprEndIndex - 1 > 0)
{
//everything up to next start brace index
yield return new LiteralFormat(format.Substring(exprEndIndex + 1
, expStartIndex - exprEndIndex - 1));
}
int endBraceIndex = format.IndexOfExpressionEnd(expStartIndex + 1);
if (endBraceIndex < 0)
{
//rest of string, no end brace (could be invalid expression)
yield return new FormatExpression(format.Substring(expStartIndex));
}
else
{
exprEndIndex = endBraceIndex;
//everything from start to end brace.
yield return new FormatExpression(format.Substring(expStartIndex
, endBraceIndex - expStartIndex + 1));
}
} while (expStartIndex > -1);
}
static int IndexOfExpressionStart(this string format, int startIndex)
{
int index = format.IndexOf('{', startIndex);
if (index == -1)
{
return index;
}
//peek ahead.
if (index + 1 < format.Length)
{
char nextChar = format[index + 1];
if (nextChar == '{')
{
return IndexOfExpressionStart(format, index + 2);
}
}
return index;
}
static int IndexOfExpressionEnd(this string format, int startIndex)
{
int endBraceIndex = format.IndexOf('}', startIndex);
if (endBraceIndex == -1)
{
return endBraceIndex;
}
//start peeking ahead until there are no more braces...
// }}}}
int braceCount = 0;
for (int i = endBraceIndex + 1; i < format.Length; i++)
{
if (format[i] == '}')
{
braceCount++;
}
else
{
break;
}
}
if (braceCount % 2 == 1)
{
return IndexOfExpressionEnd(format, endBraceIndex + braceCount + 1);
}
return endBraceIndex;
}
/// <summary>
/// Returns a new String with the last character removed.
/// If the string ends with \r\n, both characters are removed.
/// </summary>
/// <remarks>
/// "string\r\n".chop #=> "string"
/// "string\n\r".chop #=> "string\n"
/// "string\n".chop #=> "string"
/// "string".chop #=> "strin"
/// "x".chop.chop #=> ""
/// </remarks>
/// <param name="text"></param>
/// <returns></returns>
public static string Chop(this string text)
{
if (String.IsNullOrEmpty(text))
{
return text;
}
bool chopped = false;
if (text.EndsWith("\n", StringComparison.Ordinal))
{
text = text.Substring(0, text.Length - 1);
chopped = true;
}
if (text.EndsWith("\r", StringComparison.Ordinal))
{
text = text.Substring(0, text.Length - 1);
chopped = true;
}
if (!chopped)
{
text = text.Substring(0, text.Length - 1);
}
return text;
}
public static string Chomp(this string text)
{
return text.Chomp(null, StringComparison.Ordinal);
}
/// <summary>
/// Returns a new String with the last character removed.
/// If the string ends with \r\n, both characters are removed.
/// </summary>
/// <remarks>
/// "hello".chomp #=> "hello"
/// "hello\n".chomp #=> "hello"
/// "hello\r\n".chomp #=> "hello"
/// "hello\n\r".chomp #=> "hello\n"
/// "hello\r".chomp #=> "hello"
/// "hello \n there".chomp #=> "hello \n there"
/// "hello".chomp("llo") #=> "he"
/// </remarks>
public static string Chomp(this string text, string separator, StringComparison comparisonType)
{
if (String.IsNullOrEmpty(text))
{
return text;
}
if (text.EndsWith("\n", StringComparison.Ordinal))
{
text = text.Substring(0, text.Length - 1);
}
if (text.EndsWith("\r", StringComparison.Ordinal))
{
text = text.Substring(0, text.Length - 1);
}
if (!String.IsNullOrEmpty(separator))
{
if (text.EndsWith(separator, comparisonType))
{
text = text.Substring(0, text.Length - separator.Length);
}
}
return text;
}
public static string ToYesNo(this bool value)
{
return value ? "Yes" : "No";
}
}
}
// ------------------
using System;
using System.Collections.Generic;
using System.Linq;
using MbUnit.Framework;
using Subtext.Framework.Text;
namespace UnitTests.Subtext.Framework.Text
{
/// <summary>
/// Summary description for StringHelperTests.
/// </summary>
[TestFixture]
public class StringHelperTests
{
[Test]
public void Remove_PassingInTextWithRepeatingSequenceAndOccurrenceCountOfOne_RemovesFirstOccurrence()
{
//act
string result = "foo/bar/foo".Remove("Foo", 1, StringComparison.OrdinalIgnoreCase);
//assert
Assert.AreEqual("/bar/foo", result);
}
[Test]
public void Remove_PassingInTextWithRepeatingSequenceAndOccurrenceCountOfTwo_RemovesAllOccurrences()
{
//act
string result = "foo/bar/foo".Remove("Foo", 2, StringComparison.OrdinalIgnoreCase);
//assert
Assert.AreEqual("/bar/", result);
}
[Test]
public void Remove_PassingInTextWithRepeatingSequenceAndOccurrenceCountOfFour_RemovesAllOccurrences()
{
//act
string result = "foo/bar/foo".Remove("Foo", 4, StringComparison.OrdinalIgnoreCase);
//assert
Assert.AreEqual("/bar/", result);
}
[RowTest]
[Row("Blah..Blah", '.', "Blah.Blah")]
[Row("Blah...Blah", '.', "Blah.Blah")]
[Row("Blah....Blah", '.', "Blah.Blah")]
[Row("Blah- -Blah", '-', "Blah- -Blah")]
[Row("Blah--Blah", '.', "Blah--Blah")]
public void CanRemoveDoubleCharacter(string text, char character, string expected)
{
Assert.AreEqual(expected, text.RemoveDoubleCharacter(character));
}
[Test]
public void RemoveDoubleCharacter_WithNullCharacter_ThrowsArgumentNullException()
{
UnitTestHelper.AssertThrowsArgumentNullException(
() => "6 bdy.RemoveDoubleCharacter(e)".RemoveDoubleCharacter(Char.MinValue)
);
}
/// <summary>
/// Tests that we can properly pascal case text.
/// </summary>
/// <remarks>
/// Does not remove punctuation.
/// </remarks>
/// <param name="original"></param>
/// <param name="expected"></param>
[RowTest]
[Row("", "")]
[Row("a", "A")]
[Row("A", "A")]
[Row("A B", "AB")]
[Row("a bee keeper's dream.", "ABeeKeeper'sDream.")]
public void PascalCaseTests(string original, string expected)
{
Assert.AreEqual(expected, original.ToPascalCase());
}
[Test]
public void PascalCaseThrowsArgumentNullException()
{
UnitTestHelper.AssertThrowsArgumentNullException(() =>
StringHelper.ToPascalCase(null)
);
}
[RowTest]
[Row("BLAH Tast", "a", 6, StringComparison.Ordinal)]
[Row("BLAH Tast", "a", 2, StringComparison.OrdinalIgnoreCase)]
public void IndexOfHandlesCaseSensitivity(string source, string search, int expectedIndex,
StringComparison comparison)
{
Assert.AreEqual(expectedIndex, source.IndexOf(search, comparison),
"Did not find the string '{0}' at the index {1}", search, expectedIndex);
}
[RowTest]
[Row("Blah/Default.aspx", "Default.aspx", "Blah/", StringComparison.Ordinal)]
[Row("Blah/Default.aspx", "default.aspx", "Blah/", StringComparison.OrdinalIgnoreCase)]
[Row("Blah/Default.aspx", "default.aspx", "Blah/Default.aspx", StringComparison.Ordinal)]
public void LeftBeforeOfHandlesCaseSensitivity(string source, string search, string expected,
StringComparison comparison)
{
Assert.AreEqual(expected, source.LeftBefore(search, comparison),
"Truncating did not return the correct result.");
}
[Test]
public void SplitIntoWords_WithStringContainingSpaces_SplitsIntoWords()
{
//arrange, act
IEnumerable<string> words = "this is a test".SplitIntoWords().ToList();
//assert
Assert.AreEqual(4, words.Count());
Assert.AreEqual("this", words.First());
Assert.AreEqual("is", words.ElementAt(1));
Assert.AreEqual("a", words.ElementAt(2));
Assert.AreEqual("test", words.ElementAt(3));
}
[Test]
public void SplitIntoWords_WithStringContainingTabsAndDoubleSpaces_SplitsIntoWords()
{
//arrange, act
IEnumerable<string> words = " this \t is\ta test \t".SplitIntoWords().ToList();
//assert
Assert.AreEqual(4, words.Count());
Assert.AreEqual("this", words.First());
Assert.AreEqual("is", words.ElementAt(1));
Assert.AreEqual("a", words.ElementAt(2));
Assert.AreEqual("test", words.ElementAt(3));
}
/*
"string\r\n".chop #=> "string"
"string\n\r".chop #=> "string\n"
"string\n".chop #=> "string"
"string".chop #=> "strin"
"x".chop.chop #=> ""
*/
[Test]
public void Chop_WithStringEndingWithWindowsNewLine_ReturnsStringWithoutNewline()
{
Assert.AreEqual("string", "string\r\n".Chop());
}
[Test]
public void Chop_WithStringEndingWithSlashR_OnlyChopsSlashR()
{
Assert.AreEqual("string\n", "string\n\r".Chop());
}
[Test]
public void Chop_WithStringEndingWithNewline_ChopsNewline()
{
Assert.AreEqual("string", "string\n".Chop());
}
[Test]
public void Chop_WithStringEndingWithLetter_ReturnsStringWithoutLastLetter()
{
Assert.AreEqual("strin", "string".Chop());
}
[Test]
public void Chop_WithOneLetter_ReturnsEmptyString()
{
Assert.AreEqual(string.Empty, "x".Chop());
}
/*
"hello".chomp #=> "hello"
"hello\n".chomp #=> "hello"
"hello\r\n".chomp #=> "hello"
"hello\n\r".chomp #=> "hello\n"
"hello\r".chomp #=> "hello"
"hello \n there".chomp #=> "hello \n there"
"hello".chomp("llo") #=> "he"
*/
[Test]
public void Chomp_WithStringNotEndingWithDefaultSeparator_ReturnsString()
{
Assert.AreEqual("hello", "hello".Chomp());
}
[Test]
public void Chomp_WithStringEndingWithNewline_ChopsNewline()
{
Assert.AreEqual("hello", "hello\n".Chop());
}
[Test]
public void Chomp_WithStringEndingWithWindowsNewLine_ReturnsStringWithoutNewline()
{
Assert.AreEqual("hello", "hello\r\n".Chomp());
}
[Test]
public void Chomp_WithStringEndingWithSlashNSlashR_OnlyChopsSlashR()
{
Assert.AreEqual("hello\n", "hello\n\r".Chop());
}
[Test]
public void Chomp_WithStringEndingWithSlashR_OnlyChopsSlashR()
{
Assert.AreEqual("hello", "hello\r".Chop());
}
[Test]
public void Chomp_WithSeparator_ChopsSeparator()
{
Assert.AreEqual("he", "hello".Chomp("llo", StringComparison.Ordinal));
}
[Test]
public void Chomp_WithSeparatorButStringNotEndingWithSeparator_LeavesStringAlone()
{
Assert.AreEqual("hello world", "hello world".Chomp("llo", StringComparison.Ordinal));
}
}
}