Click here to Skip to main content
15,886,689 members
Articles / Programming Languages / C# 4.0

The ReplaceMany Method

Rate me:
Please Sign up or sign in to vote.
4.93/5 (40 votes)
16 Jan 2012CPOL5 min read 65.3K   1.4K   60  
Performing many replacements in a string at one time.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;

namespace System
{
    public static class ReplaceManyExtensions
    {
        /// <summary>
        /// Compares two arrays of bytes and returns 0 if they are equal up to the specified index.
        /// </summary>
        [DllImport("msvcrt.dll", CallingConvention = CallingConvention.Cdecl)]
        private static extern unsafe int memcmp(byte* b1, byte* b2, int count);
        [DllImport("msvcrt.dll", CallingConvention = CallingConvention.Cdecl)]
        private static extern unsafe int memcpy(byte* dest, byte* src, int count);
 
        /// <summary>
        /// Extends an array to a new size by allocating a new array and copying data from the first one.
        /// </summary>
        /// <param name="array">An saray to extend.</param>
        /// <param name="oldSize">Number of elements to copy from the original array. It must be less or equal than the array's length.</param>
        /// <param name="newSize">Size of an array after expanding.</param>
        public static unsafe char[] ExpandArray(char[] array, int oldSize, int newSize)
        {
            if (oldSize > newSize || oldSize > array.Length)
                throw new ArgumentOutOfRangeException();
            char[] bigger;
            bigger = new char[newSize];
            fixed (char* bpt = bigger, apt = array)
                memcpy((byte*)bpt, (byte*)apt, oldSize << 1);
            return bigger;
        }
        /// <summary>
        /// Replaces each occurance of each string in the oldValues array with a corresponding string in the newValues array.
        /// </summary>
        /// <param name="str">A string to process</param>
        /// <param name="oldValues">A string to search for.</param>
        /// <param name="newValues">The replacements</param>
        /// <returns>A string after replacements made.</returns>
        public static unsafe string ReplaceMany(this string str, string[] oldValues, string[] newValues)
        {
            return ReplaceMany(str, oldValues, newValues, 0);
        }
        /// <summary>
        /// Replaces each occurance of each string in the oldValues array with a corresponding string in the newValues array.
        /// </summary>
        /// <param name="str">A string to process</param>
        /// <param name="oldValues">A string to search for.</param>
        /// <param name="newValues">The replacements</param>
        /// <param name="resultStringLength">(Optional, default is str.Length*2) A suggested length of a return string. If the result
        /// string was longer than the value of this paramenter, a buffer reallocation would occur. It is better to pass
        /// too big value than too small.</param>
        /// <returns>A string after replacements made.</returns>
        public static unsafe string ReplaceMany(this string str, string[] oldValues, string[] newValues, int resultStringLength)
        {
            int oldCount = oldValues.Length;
            int newCount = newValues.Length;
            if (oldCount != newCount)
                throw new ArgumentException("Each old value must match exactly one new value");
            for (int i = 0; i < oldCount; i++)
            {
                if (string.IsNullOrEmpty(oldValues[i]))
                    throw new ArgumentException("Old value may not be null or empty.", "oldValues");
                if (newValues[i] == null)
                    throw new ArgumentException("New value may be null.", "newValues");
            }
            // cheating for a small number of replacement strings
            /*if (oldCount == 1)
                return str.Replace(oldValues[0], newValues[0]);
            if (oldCount == 2)
                return str.Replace(oldValues[0], newValues[0]).Replace(oldValues[1], newValues[1]);*/
            
            int strLen = str.Length;
            int buildSpace = resultStringLength == 0 ? strLen << 1 : resultStringLength;
            // A hand-made StringBuilder is here
            char[] buildArr = new char[buildSpace];        
            // cached pinned pointers
            GCHandle buildHandle = GCHandle.Alloc(buildArr, GCHandleType.Pinned);
            GCHandle[] oldHandles = new GCHandle[oldCount];
            GCHandle[] newHandles = new GCHandle[newCount];
            int* newLens = stackalloc int[newCount];
            int* oldLens = stackalloc int[newCount];
            char** oldPtrs = stackalloc char*[newCount];
            char** newPtrs = stackalloc char*[newCount];
            // other caches
            for (int i = 0; i < oldCount; i++)
            {
                oldHandles[i] = GCHandle.Alloc(oldValues[i], GCHandleType.Pinned);
                newHandles[i] = GCHandle.Alloc(newValues[i], GCHandleType.Pinned);
                oldPtrs[i] = (char*)oldHandles[i].AddrOfPinnedObject();
                newPtrs[i] = (char*)newHandles[i].AddrOfPinnedObject();
                newLens[i] = newValues[i].Length;
                oldLens[i] = oldValues[i].Length;
            }
            int buildIndex = 0;
            fixed (char* _strFix = str)
            {
                char* build = (char*)buildHandle.AddrOfPinnedObject();
                char* pBuild = build;
                char* pStr = _strFix;
                char* endStr = pStr + strLen;
                char* copyStartPos = pStr;
                while (pStr != endStr)
                {
                    bool find = false;
                    for (int i = 0; i < oldCount; ++i)
                    {
                        int oldValLen = *(oldLens+i);
                        // if a string to find does not exceed the original string
                        if (oldValLen > 0 && pStr + oldValLen <= endStr)
                        {
                            char* _oldFix = *(oldPtrs + i);
                            if (*pStr == *_oldFix) // check the first char
                            {
                                // compare the rest. First, compare the second character
                                find = oldValLen == 1;
                                if (!find)
                                {
                                    if (*(pStr + 1) == *(_oldFix + 1))
                                        find = oldValLen == 2
                                        // use a native memcmp function.
                                        || 0 == memcmp((byte*)(pStr + 2), (byte*)(_oldFix + 2), (oldValLen - 2) << 1);
                                }
                                
                                if (find)
                                {
                                    int newValLen = newLens[i];
                                    char* newFix = newPtrs[i];
                                    int copyLen = (int)(pStr - copyStartPos);
                                    // allocate new space if needed.
                                    if (buildIndex + newValLen + copyLen > buildSpace)
                                    {
                                        buildHandle.Free();
                                        int oldSpace = buildSpace;
                                        buildSpace = Math.Max((int)(buildIndex + newValLen + copyLen), buildSpace << 1);
                                        buildArr = ExpandArray(buildArr, oldSpace, buildSpace);
                                        buildHandle = GCHandle.Alloc(buildArr, GCHandleType.Pinned);
                                        build = (char*)buildHandle.AddrOfPinnedObject();
                                        pBuild = build + buildIndex;
                                    }
                                    // if there is a part from the original string to copy, then do it.
                                    if (copyLen > 0)
                                    {
                                        memcpy((byte*)(pBuild), (byte*)copyStartPos, copyLen << 1);
                                        buildIndex += copyLen;
                                        pBuild = build + buildIndex;
                                    }
                                    // append the replacement to the builder
                                    memcpy((byte*)(pBuild), (byte*)newFix, newValLen << 1);
                                    pBuild += newValLen;
                                    buildIndex += newValLen;
                                    pStr += oldValLen;
                                    copyStartPos = pStr;
                                    // this is redutant, but brings more determinism to method's behaviour.
                                    break;
                                }
                            }
                        }
                    }
                    // if not found, just increment the pointer within the main string
                    if (!find)
                        pStr++;
                }
                // if there is a part from the original string to copy, then do it.
                if (copyStartPos != pStr)
                {
                    int copyLen = (int)(pStr - copyStartPos);
                    // again, allocate new space if needed
                    if (buildIndex + copyLen > buildSpace)
                    {
                        buildHandle.Free();
                        int oldSpace = buildSpace;
                        buildSpace = Math.Max((int)(buildIndex + copyLen), buildSpace << 1);
                        buildArr = ExpandArray(buildArr, oldSpace, buildSpace);
                        buildHandle = GCHandle.Alloc(buildArr, GCHandleType.Pinned);
                        build = (char*)buildHandle.AddrOfPinnedObject();
                        pBuild = build + buildIndex;
                    }
                    // append the ending
                    memcpy((byte*)(pBuild), (byte*)copyStartPos, copyLen << 1);
                    buildIndex += copyLen;
                }
            }
            // unpin string handles
            for (int i = 0; i < newCount; i++)
            {
                oldHandles[i].Free();
                newHandles[i].Free();
            }
            buildHandle.Free();
            return new string(buildArr, 0, buildIndex);
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
Poland Poland
My name is Jacek. Currently, I am a Java/kotlin developer. I like C# and Monthy Python's sense of humour.

Comments and Discussions