Javascript Character Classes

I have put together the beginnings of a javascript character class script. The idea was to create a testing platform and the build the character class handler at the same time. Starting off as a need to build a scanner/parser I was looking for some functions to give me IsDigit, IsAlpha, IsDigitAlpha and other similar functions.

Function List

The list of functions in this implementation is:

  • IsControl
  • IsPunctuation
  • IsDigit
  • IsLetter
  • IsDigitOrLetter
  • IsWhiteSpace
  • IsControlNotWS
  • IsUnicode

These functions are not 100% Unicode compatible but there is allways room for improvement. I used the ascii chart from the CodePage Section of the Wikipedia entry for utf8 here.

The IsControl function

IsControl:function(c){
    var cc = c.charCodeAt(0);
    if ( ( cc >= 00 && cc <= 0x1F ) ||
        ( cc == 0x7F ) ){
        return true ;
    }
    return false ;
},

The Punctuation function

IsPunctuation:function(c){
    var cc = c.charCodeAt(0);
    if ( ( cc >= 20 && cc <= 0x2F ) ||
        ( cc >= 0x3A && cc <= 0x40 ) ||
        ( cc >= 0x5B && cc <= 0x60 ) ||
        ( cc >= 0x7B && cc <= 0x7E ) ){
            return true ;
        }
        return false ;
    }
}

The IsDigit function

IsDigit: function(c){
    var cc = c.charCodeAt(0);
    if ( cc >= 0x30 && cc <= 0x39 ){
        return true ;
    }
    return false ;
}

The IsLetter function

IsLetter: function(c){
    var cc = c.charCodeAt(0);
    if ( ( cc >= 0x41 && cc <= 0x5A ) ||
        ( cc >= 0x61 && cc <= 0x7A ) ){
        return true ;
    }
    return false ;
}

The IsLetterOrDigit function

IsLetterOrDigit: function(c){
    var cc = c.charCodeAt(0);
    if ( ( cc >= 0x30 && cc <= 0x39 ) ||
        ( cc >= 0x41 && cc <= 0x5A ) ||
        ( cc >= 0x61 && cc <= 0x7A ) ){
        return true ;
    }
    return false ;
}

The IsWhiteSpace function

IsWhiteSpace: function(c){
    var cc = c.charCodeAt(0);
    if ( ( cc >= 0x0009 && cc <= 0x000D ) ||
        ( cc == 0x0020 ) ||
        ( cc == 0x0085 ) ||
        ( cc == 0x00A0 ) ){
        return true ;
    }
    return false ;
}

The Code Descriptions

As you can tell from the previous functions there is a definite pattern that is being followed. Although there may be much more efficient ways to write this out in terms of space and performance, that is not what the idea of this article is about. The basic idea of these functions is to get the character code of the index 0 of the string that is passed in and check that against predefined ranges. You should note at this point there is no type checking in these functions that is left up to the developer using the functions.

The Testing Framework

function TestIsControl(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 33 ;
}

function TestIsPunctuation(){
   var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 33 ;
}

function TestIsLetterOrDigit(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsLetterOrDigit ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == ( 26 * 2 ) + 10 ;
}

function TestIsLetter(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsLetter ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 26 * 2 ;
}

function TestIsDigit(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsDigit ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 10 ;
}

function TestIsWS(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsWhiteSpace ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }

    return tr == 8 ;
}

function TestIsNoWSControl(){
    var tr = 0 ;
    for ( i = 0 ; i < 255 ; i++ ){
        if ( Char.IsNoWSControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 28 ;
}

$ = function(obj){   
    var _this = {
        context:"",
        append:function(strHtml){
            var obj = document.getElementById ( this.context ) ;
            var html = obj.innerHTML ;
            html += strHtml ;
            obj.innerHTML = html ;
        }
    };
    _this.context = obj.substring(1);
    return _this ;
};
window.onload = function() {
    $("#TestHarness").append( "
	<li>Control Test=" + TestIsControl () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Punctuation Test=" + TestIsPunctuation () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Letter Or Digit Test =" + TestIsLetterOrDigit () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Letter Test =" + TestIsLetter () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Digit Test =" + TestIsDigit () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>WhiteSpace Test =" + TestIsWS () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>NoWS Control Test =" + TestIsNoWSControl () + "</li>
" ) ;
}

Test Framework Descriptions

The test framework has a pattern just like the character class functions which take the characters generated from the character code from 0 to 255 and testing each value in the character class functions. If the result is positive we add 1 to the index of positive values. Then we simply check the total against the proposed valid total.

The $ Dollar function

The dollar function is a helper that is used to merely grab the id of our output div and append html to it.

The OnLoad function

The onload function is run when the window has completed loading for us. ( NOTE: This may not be supported on all platforms ). An alternative to this function is the document.readyState property tested in a timeout.

The Complete Class

var Char =  (function(){
    var _this = {
        IsControl:function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 00 &amp;&amp; cc &lt;= 0x1F ) ||
                ( cc == 0x7F ) ){
                return true ;
            }
            return false ;
        },
        IsNoWSControl:function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 00 &amp;&amp; cc &lt;= 0x1F ) || ( cc == 0x7F ) ){
                if ( cc &gt;= 0x09 &amp;&amp; cc &lt;= 0x0D ) return false ;
                return true ;
            }
            return false ;
        },
        IsPunctuation:function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 20 &amp;&amp; cc &lt;= 0x2F ) ||
                ( cc &gt;= 0x3A &amp;&amp; cc &lt;= 0x40 ) ||
                ( cc &gt;= 0x5B &amp;&amp; cc &lt;= 0x60 ) ||
                ( cc &gt;= 0x7B &amp;&amp; cc &lt;= 0x7E ) ){
                return true ;
            }
            return false ;
        },
        IsLetterOrDigit: function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 0x30 &amp;&amp; cc &lt;= 0x39 ) ||
                ( cc &gt;= 0x41 &amp;&amp; cc &lt;= 0x5A ) ||
                ( cc &gt;= 0x61 &amp;&amp; cc &lt;= 0x7A ) ){
                return true ;
            }
            return false ;
        },
        IsDigit: function(c){
            var cc = c.charCodeAt(0);
            if ( cc &gt;= 0x30 &amp;&amp; cc &lt;= 0x39 ){
                return true ;
            }
            return false ;
        },
        IsLetter: function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 0x41 &amp;&amp; cc &lt;= 0x5A ) ||
                ( cc &gt;= 0x61 &amp;&amp; cc &lt;= 0x7A ) ){
                return true ;
            }
            return false ;
        },
        IsWhiteSpace: function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc &gt;= 0x0009 &amp;&amp; cc &lt;= 0x000D ) ||
                ( cc == 0x0020 ) ||
                ( cc == 0x0085 ) ||
                ( cc == 0x00A0 ) ||
                ( cc == 0x1680 ) ||
                ( cc == 0x180E ) ||
                ( cc &gt;= 0x2000 &amp;&amp; cc &lt;= 0x200A ) ||
                ( cc == 0x2028 ) ||
                ( cc == 0x2029 ) ||
                ( cc == 0x202F ) ||
                ( cc == 0x205F ) ||
                ( cc == 0x3000 ) ){
                    return true ;
            }
            return false ;
        },
        IsUnicode: function(c){
            var cc = c.charCodeAt(0);
            if ( ( cc == 0x0009 ) ||
                ( cc == 0x000A ) ||
                ( cc == 0x000D ) ||
                ( cc &gt;= 0x0020 &amp;&amp; cc &lt;= 0xD7FF ) ||
                ( cc &gt;= 0xE000 &amp;&amp; cc &lt;= 0xFFFD ) ||
                ( cc &gt;= 0x10000 &amp;&amp; cc &lt;= 0x10FFFF ) ) {
                return true ;
            }
            return false ;
        }
    };
    return _this ;
})();
function TestIsControl(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 33 ;
}

function TestIsPunctuation(){
   var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 33 ;
}

function TestIsLetterOrDigit(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsLetterOrDigit ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == ( 26 * 2 ) + 10 ;
}

function TestIsLetter(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsLetter ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 26 * 2 ;
}

function TestIsDigit(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsDigit ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 10 ;
}

function TestIsWS(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsWhiteSpace ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }

    return tr == 8 ;
}

function TestIsNoWSControl(){
    var tr = 0 ;
    for ( i = 0 ; i &lt; 255 ; i++ ){
        if ( Char.IsNoWSControl ( String.fromCharCode ( i ) ) ){
            tr ++ ;
        }
    }
    return tr == 28 ;
}

$ = function(obj){   
    var _this = {
        context:"",
        append:function(strHtml){
            var obj = document.getElementById ( this.context ) ;
            var html = obj.innerHTML ;
            html += strHtml ;
            obj.innerHTML = html ;
        }
    };
    _this.context = obj.substring(1);
    return _this ;
};
window.onload = function() {
    $("#TestHarness").append( "
	<li>Control Test=" + TestIsControl () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Punctuation Test=" + TestIsPunctuation () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Letter Or Digit Test =" + TestIsLetterOrDigit () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Letter Test =" + TestIsLetter () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>Digit Test =" + TestIsDigit () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>WhiteSpace Test =" + TestIsWS () + "</li>
" ) ;
    $("#TestHarness").append( "
	<li>NoWS Control Test =" + TestIsNoWSControl () + "</li>
" ) ;
}

Final Notes

The only remaining thing that is needed is the html skeleton enabling the testing framework which merely needs to include the javascript and have a div with the id TestHarness.

ttessier

About ttessier

Professional Developer and Operator of SwhistleSoft
This entry was posted in Javascript Development and tagged , , , , , , , , , . Bookmark the permalink.

Leave a Reply

Your email address will not be published. Required fields are marked *