JavaScript function to remove HTML formatting from SharePoint Rich Text Editor content.
/**
* Remove HTML formatting from SharePoint Rich Text Editor content with JavaScript
*
* People like to avoid SharePoint 2010's Rich Text and Enhanced Rich Text
* editors until they find out they can paste in their Word document.
*
* This can be a problem for people that want to reuse the content, because
* Word's HTML/XML formatting comes with zillions of custom styles and cleaning
* up these styles by hand can take forever.
*
* Here's a JavaScript function with Regular Expressions that can be used for
* stripping off the tags automatically.
*
* Source: https://maxderungs.com/blog/remove-html-formatting-from-sharepoint-rich-text-editor-content-with-javascript
*
* @param {string} str
* @returns {string}
*/
function CleanWordHTML(str) {
str = str.replace(/<o:p>\s*<\/o:p>/g, "");
str = str.replace(/<o:p>.*?<\/o:p>/g, " ");
str = str.replace(/\s*mso-[^:]+:[^;"]+;?/gi, "");
str = str.replace(/\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "");
str = str.replace(/\s*MARGIN: 0cm 0cm 0pt\s*"/gi, '"');
str = str.replace(/\s*TEXT-INDENT: 0cm\s*;/gi, "");
str = str.replace(/\s*TEXT-INDENT: 0cm\s*"/gi, '"');
str = str.replace(/\s*TEXT-ALIGN: [^\s;]+;?"/gi, '"');
str = str.replace(/\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, '"');
str = str.replace(/\s*FONT-VARIANT: [^\s;]+;?"/gi, '"');
str = str.replace(/\s*tab-stops:[^;"]*;?/gi, "");
str = str.replace(/\s*tab-stops:[^"]*/gi, "");
str = str.replace(/\s*face="[^"]*"/gi, "");
str = str.replace(/\s*face=[^ >]*/gi, "");
str = str.replace(/\s*FONT-FAMILY:[^;"]*;?/gi, "");
str = str.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3");
str = str.replace(/<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3");
str = str.replace(/\s*style="\s*"/gi, "");
str = str.replace(/<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, " ");
str = str.replace(/<SPAN\s*[^>]*><\/SPAN>/gi, "");
str = str.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3");
str = str.replace(/<SPAN\s*>(.*?)<\/SPAN>/gi, "$1");
str = str.replace(/<FONT\s*>(.*?)<\/FONT>/gi, "$1");
str = str.replace(/<\\?\?xml[^>]*>/gi, "");
str = str.replace(/<\/?\w+:[^>]*>/gi, "");
str = str.replace(/<H\d>\s*<\/H\d>/gi, "");
str = str.replace(/<H1([^>]*)>/gi, "");
str = str.replace(/<H2([^>]*)>/gi, "");
str = str.replace(/<H3([^>]*)>/gi, "");
str = str.replace(/<H4([^>]*)>/gi, "");
str = str.replace(/<H5([^>]*)>/gi, "");
str = str.replace(/<H6([^>]*)>/gi, "");
str = str.replace(/<\/H\d>/gi, "<br>"); //remove this to take out breaks where Heading tags were
str = str.replace(/<(U|I|STRIKE)> <\/\1>/g, " ");
str = str.replace(/<(B|b)> <\/\b|B>/g, "");
str = str.replace(/<([^\s>]+)[^>]*>\s*<\/\1>/g, "");
str = str.replace(/<([^\s>]+)[^>]*>\s*<\/\1>/g, "");
str = str.replace(/<([^\s>]+)[^>]*>\s*<\/\1>/g, "");
//some RegEx code for the picky browsers
var re = new RegExp("(<P)([^>]*>.*?)(</P>)", "gi");
str = str.replace(re, "<div$2</div>");
var re2 = new RegExp("(<font|<FONT)([^*>]*>.*?)(</FONT>|</font>)", "gi");
str = str.replace(re2, "<div$2</div>");
str = str.replace(/size|SIZE = ([\d]{1})/g, "");
return str;
}