Skip to main content

MySQL Fuzzy Text Searching Using the SOUNDEX Function

-- ======================================================================
-- MySQL Fuzzy Text Searching Using the SOUNDEX Function
--
-- The Soundex() function is only one of several ways to perform fuzzy
-- searches on your MySQL data.
--
-- http://www.databasejournal.com/features/mysql/mysql-fuzzy-text-searching-using-the-soundex-function.html
-- ======================================================================

-- ----------------------------------------------------------------------
-- soundex_match
--
-- Soundex match a word in a multi-word string
--
-- @needle......: The word you are looking for.
-- @haystack....: The block of text that you are searching.
-- @splitChar...: The whitespace character that'll split the
--                string into single words. Generally it is
--                the space(' ').
--
-- If any word in the haystack sounds similar to the needle,
-- the function will return 1. Otherwise it returns 0.
-- ----------------------------------------------------------------------
drop function if exists soundex_match;
create function soundex_match(
    needle    varchar(128),
    haystack  text,
    splitChar varchar(1)
)
returns tinyint(4)
deterministic
    begin
        declare spacePos int;
        declare searchLen int default 0;
        declare tempStr text default haystack;
        declare tmp text default '';
        declare soundx1 varchar(64) default '';
        declare soundx2 varchar(64) default '';

        set searchLen = length(haystack);
        set spacePos = locate(splitChar, tempStr);
        set soundx1 = soundex(needle);

        while searchLen > 0 do
            if spacePos = 0
            then
                set tmp = tempStr;
                select soundex(tmp)
                into soundx2;
                if soundx1 = soundx2
                then
                    return 1;
                else
                    return 0;
                end if;
            else
                set tmp = substr(tempStr, 1, spacePos - 1);
                set soundx2 = soundex(tmp);
                if soundx1 = soundx2
                then
                    return 1;
                end if;

                set tempStr = substr(tempStr, spacePos + 1);
                set searchLen = length(tempStr);
            end if;

            set spacePos = locate(splitChar, tempStr);
        end while;

        return 0;
    end;

-- ----------------------------------------------------------------------
-- soundex_match_all
--
-- Soundex match multiple terms against multiple words
--
-- @needle......: The word you are looking for.
-- @haystack....: The block of text that you are searching.
-- @splitChar...: The whitespace character that'll split the
--                string into single words. Generally it is
--                the space(' ').
--
-- If any word in the haystack sounds similar to the needle,
-- the function will return 1. Otherwise it returns 0.
-- ----------------------------------------------------------------------
drop function if exists soundex_match_all;
create function soundex_match_all(
    needle    varchar(128),
    haystack  text,
    splitChar varchar(1)
)
returns tinyint(4)
deterministic
    begin
        /* find the first instance of the splitting character */
        declare comma int default 0;
        declare word text;

        set comma = LOCATE(splitChar, needle);
        set word = TRIM(needle);

        if LENGTH(haystack) = 0
        then
            return 0;
        elseif comma = 0
            then
                /* one word search term */
                return soundex_match(word, haystack, splitChar);
        end if;

        set word = trim(substr(needle, 1, comma));

        /* Insert each split variable into the word variable */
        repeat
            if soundex_match(word, haystack, splitChar) = 0
            then
                return 0;
            end if;

            /* get the next word */
            set needle = trim(substr(needle, comma));
            set comma = LOCATE(splitChar, needle);
            if comma = 0
            then
                /* last word */
                return soundex_match(needle, haystack, splitChar);
            end if;

            set word = trim(substr(needle, 1, comma));
        until length(word) = 0
        end repeat;

        return 0;
    end;