HTML Entity Encoder / Decoder

Audience Level

Beginner and above

Entity Encoder

Summary

Converts a text string into decimal entities. E.g.: "hello" becomes "hello"

Instructions

Invoke as getTextToEntities(stringToEncode, partialEncodeOnly) where "partialEncodeOnly" is either "true" (don't encode letters, numbers or "_") or "false" (encode everything).

Example

getTextToEntities("Fish&chips", true) returns "Fish&chips"
getTextToEntities("Fish&chips", false) returns "Fish&chips"

Source Code

function getTextToEntities(strPlainText, blnPartialEncodeOnly) {
    var strPartial  = [];
    var strFull     = [];
    var intP        = 0;
    var intF        = 0;
    var objPartialRegExp = (new RegExp).compile("[\\w\\s]");

    for (var i=0, j=strPlainText.length; i<j; ++i) {
        var strChar = strPlainText.charAt(i);
        var intChar = strChar.charCodeAt(0);

        if (isNaN(intChar)) {
            // IF CHAR FAILED TO DECODE, LEAVE AS CHAR
            strPartial.push(strFull.push(strChar));
        }
        else {
            var strEntity = "&#" + intChar + ";";
            strFull.push(strEntity);
            // IF CHAR WAS [a-zA-Z0-9_ \t] LEAVE AS CHAR, ELSE REPLACE WITH ENTITY
            strPartial.push(objPartialRegExp.test(strChar) ? strChar : strEntity);
        }
    }
    return (blnPartialEncodeOnly ? strPartial.join("") : strFull.join(""));
}

Entity Decoder

Summary

Converts a string of decimal entities (or a mixed string of entities and plain text) into plain text.

Example

getEntitiesToText("&#104;&#101;&#108;&#108;&#111;") returns "hello"

Source Code

function getEntitiesToText(strEncodedText) {
    var strData     = String(strEncodedText);
    var objRegExp   = (new RegExp).compile("&#(\\d+);", "ig");

    /* FOR EACH MATCH TO ANY ENTITY, REPLACE THAT
    ENTITY GLOBALLY WITH ITS SINGLE CHAR EQUIVALENT */

    while(objRegExp.test(strData)) {
        var strCharMatch    = RegExp.$1;
        var objRegExpMatch  = new RegExp("&#" + strCharMatch + ";", "ig");
        strData = strData.replace(objRegExpMatch, String.fromCharCode(strCharMatch));
    }
    return strData;
}

Update

Kirk Schneider writes: Hi Andrew, I just wanted to thank you for posted the Entity Decoder. It save us the time of having to come up with a regexp to handle this. After working with you routine awhile, we realized that there was a much simpler way of doing this using your regexp. Below is a rewritten version of your function which runs more quickly and also downloads more quickly to the client side.

function getEntitiesToText(strEncodedText) {
    return strEncodedText.replace(/(&#(\d+));/ig, String.fromCharCode("$1"));
}

Advertisement

Feedback

Voting Panel
Could this be improved?
or
Did you find any bugs?
or
Rate this script: (0 = poor, 5 = very good)
Answers are anonymous, only the combined totals are stored. Uses cookies.