Some boundary foundations

This commit is contained in:
斟酌 鵬兄 2016-03-30 08:39:13 +08:00
parent 564aef86aa
commit a7d2e80f4a
5 changed files with 288 additions and 1 deletions

View File

@ -313,7 +313,11 @@
this.__cMovement = true;
// Word boundary
this.__comp( kCode, function(){
debug.Info( "Word boundary" );
var WordMatch = analyzer.wordAt( ccur.aPos );
debug.Info( "Word: "
+ ccur.feeder.content.substring( WordMatch.open, WordMatch.close + 1 )
);
}, W );
this.__comp( kCode, function(){
debug.Info( "Bracket boundary [" );

View File

@ -4,6 +4,9 @@
/** @type {System.Debug} */
var debug = __import( "System.Debug" );
/** @type {Components.Vim.Syntax.Word} */
var Word = ns[ NS_INVOKE ]( "Word" );
var TOK_OPEN = 0;
var TOK_CLOSED = 1;
var TOK_LEVEL = 2;
@ -247,6 +250,24 @@
}
};
Analyzer.prototype.wordAt = function( p )
{
var c = this.__feeder.content;
var Len = c.length;
var i = p, j = p;
var word = new Word( c[ p ] );
if( 0 < p ) while( word.test( c[ -- i ] ) );
if( p < Len ) while( word.test( c[ ++ j ] ) );
var tMatch = new TokenMatch();
tMatch.__open = i + 1;
tMatch.__close = j - 1;
return tMatch;
};
var TokenMatch = function()
{
this.__open = -1;

View File

@ -0,0 +1,255 @@
(function(){
var ns = __namespace( "Components.Vim.Syntax" );
var KINGDOMS = [
[ // Numbers
[ 0x0030, 0x0039 ]
]
,
[ // Latin
[ 0x0041, 0x005A ], [ 0x0061, 0x007A ] // Basic Latin
, [ 0x00C0, 0x00FF ] // Latin-1 Supplement
, [ 0x0100, 0x017F ] // Latin Extended-A
, [ 0x0180, 0x024F ] // Latin Extended-B
// Latin-2 supplement
, [ 0x1D00, 0x1D7F ] // Phonetic Extensions
, [ 0x1D80, 0x1DBF ] // Phonetic Extensions Supplement
, [ 0x1DC0, 0x1DFF ] // Combining Diacritical Marks Supplement
, [ 0x1E00, 0x1EFF ] // Latin extended additional
, [ 0x1F00, 0x1FFF ] // Greek Extended
]
,
[
, [ 0x0250, 0x02AF ] // IPA Extensions
, [ 0x02B0, 0x02FF ] // Spacing Modifier Letters
, [ 0x0300, 0x036F ] // Combining Diacritical Marks
, [ 0x0370, 0x03FF ] // Greek and Coptic
, [ 0x0400, 0x04FF ] // Cyrillic
, [ 0x0500, 0x052F ] // Cyrillic Supplement
, [ 0x0530, 0x058F ] // Armenian
]
,
[ // Aramaic Scripts
[ 0x0590, 0x05FF ] // Hebrew
, [ 0x0600, 0x06FF ] // Arabic
, [ 0x0700, 0x074F ] // Syriac
, [ 0x0750, 0x077F ] // Arabic Supplement
, [ 0x0780, 0x07BF ] // Thaana
, [ 0x07C0, 0x07FF ] // N'Ko
, [ 0x0800, 0x083F ] // Samaritan
, [ 0x0840, 0x085F ] // Mandaic
, [ 0x08A0, 0x08FF ] // Arabic Extended-A
]
,
[ // Brahmic scripts
[ 0x0900, 0x097F ] // Devanagari
, [ 0x0980, 0x09FF ] // Bengali
, [ 0x0A00, 0x0A7F ] // Gurmukhi
, [ 0x0A80, 0x0AFF ] // Gujarati
, [ 0x0B00, 0x0B7F ] // Oriya
, [ 0x0B80, 0x0BFF ] // Tamil
, [ 0x0C00, 0x0C7F ] // Telugu
, [ 0x0C80, 0x0CFF ] // Kannada
, [ 0x0D00, 0x0D7F ] // Malayalam
, [ 0x0D80, 0x0DFF ] // Sinhala
, [ 0x0E00, 0x0E7F ] // Thai
, [ 0x0E80, 0x0EFF ] // Lao
, [ 0x0F00, 0x0FFF ] // Tibetan
, [ 0x1000, 0x109F ] // Myanmar
, [ 0x10A0, 0x10FF ] // Georgian
, [ 0x1100, 0x11FF ] // Hangul Jamo
, [ 0x1200, 0x137F ] // Ethiopic
, [ 0x1380, 0x139F ] // Ethiopic Supplement
, [ 0x13A0, 0x13FF ] // Cherokee
, [ 0x1400, 0x167F ] // Unified Canadian Aboriginal Syllabics
, [ 0x1680, 0x169F ] // Ogham
, [ 0x16A0, 0x16FF ] // Runic
]
,
[ // Philippine scripts
[ 0x1700, 0x171F ] // Tagalog
, [ 0x1720, 0x173F ] // Hanunoo
, [ 0x1740, 0x175F ] // Buhid
, [ 0x1760, 0x177F ] // Tagbanwa
, [ 0x1780, 0x17FF ] // Khmer
, [ 0x1800, 0x18AF ] // Mongolian
, [ 0x18B0, 0x18FF ] // Unified Canadian Aboriginal Syllabics Extended
, [ 0x1900, 0x194F ] // Limbu
]
,
[ // Tai scripts
[ 0x1950, 0x197F ] // Tai Le
, [ 0x1980, 0x19DF ] // Tai Lue
, [ 0x19E0, 0x19FF ] // Khmer Symbols
, [ 0x1A00, 0x1A1F ] // Buginese
, [ 0x1A20, 0x1AAF ] // Tai Tham
, [ 0x1AB0, 0x1AFF ] // Combining Diacritical Marks Extended
, [ 0x1B00, 0x1B7F ] // Balinese
, [ 0x1B80, 0x1BBF ] // Sundanese
, [ 0x1BC0, 0x1BFF ] // Batak
, [ 0x1C00, 0x1C4F ] // Lepcha
, [ 0x1C50, 0x1C7F ] // Ol Chiki
, [ 0x1CC0, 0x1CCF ] // Sundanese Supplement
, [ 0x1CD0, 0x1CFF ] // Vedic Extensions
]
,
[ // Symbols
[ 0x2000, 0x206F ] // General Punctuation
, [ 0x2070, 0x209F ] // Superscripts and Subscripts
, [ 0x20A0, 0x20CF ] // Currency Symbols
, [ 0x20D0, 0x20FF ] // Combining Diacritical Marks for Symbols
, [ 0x2100, 0x214F ] // Letterlike Symbols
, [ 0x2150, 0x218F ] // Number Forms
, [ 0x2190, 0x21FF ] // Arrows
, [ 0x2200, 0x22FF ] // Mathematical Operators
, [ 0x2300, 0x23FF ] // Miscellaneous Technical
, [ 0x2400, 0x243F ] // Control Pictures
, [ 0x2440, 0x245F ] // Optical Character Recognition
, [ 0x2460, 0x24FF ] // Enclosed Alphanumerics
, [ 0x2500, 0x257F ] // Box Drawing
, [ 0x2580, 0x259F ] // Block Elements
, [ 0x25A0, 0x25FF ] // Geometric Shapes
, [ 0x2600, 0x26FF ] // Miscellaneous Symbols
, [ 0x2700, 0x27BF ] // Dingbats
, [ 0x27C0, 0x27EF ] // Miscellaneous Mathematical Symbols-A
, [ 0x27F0, 0x27FF ] // Supplemental Arrows-A
, [ 0x2800, 0x28FF ] // Braille Patterns
, [ 0x2900, 0x297F ] // Supplemental Arrows-B
, [ 0x2980, 0x29FF ] // Miscellaneous Mathematical Symbols-B
, [ 0x2A00, 0x2AFF ] // Supplemental Mathematical Operators
, [ 0x2B00, 0x2BFF ] // Miscellaneous Symbols and Arrows
, [ 0x2C00, 0x2C5F ] // Glagolitic
, [ 0x2C60, 0x2C7F ] // Latin Extended-C
, [ 0x2C80, 0x2CFF ] // Coptic
, [ 0x2D00, 0x2D2F ] // Georgian Supplement
, [ 0x2D30, 0x2D7F ] // Tifinagh
, [ 0x2D80, 0x2DDF ] // Ethiopic Extended
, [ 0x2DE0, 0x2DFF ] // Cyrillic Extended-A
, [ 0x2E00, 0x2E7F ] // Supplemental Punctuation
]
,
[ // CJK scripts and symbols
, [ 0x2E80, 0x2EFF ] // CJK Radicals Supplement
, [ 0x2F00, 0x2FDF ] // Kangxi Radicals
, [ 0x2FF0, 0x2FFF ] // Ideographic Description Characters
, [ 0x3000, 0x303F ] // CJK Symbols and Punctuation
, [ 0x3040, 0x309F ] // Hiragana
, [ 0x30A0, 0x30FF ] // Katakana
, [ 0x3100, 0x312F ] // Bopomofo
, [ 0x3130, 0x318F ] // Hangul Compatibility Jamo
, [ 0x3190, 0x319F ] // Kanbun
, [ 0x31A0, 0x31BF ] // Bopomofo Extended
, [ 0x31C0, 0x31EF ] // CJK Strokes
, [ 0x31F0, 0x31FF ] // Katakana Phonetic Extensions
, [ 0x3200, 0x32FF ] // Enclosed CJK Letters and Months
, [ 0x3300, 0x33FF ] // CJK Compatibility
, [ 0x3400, 0x4DBF ] // CJK Unified Ideographs Extension A
, [ 0x4DC0, 0x4DFF ] // Yijing Hexagram Symbols
, [ 0x4E00, 0x9FFF ] // CJK Unified Ideographs
, [ 0xA000, 0xA48F ] // Yi Syllables
, [ 0xA490, 0xA4CF ] // Yi Radicals
, [ 0xA4D0, 0xA4FF ] // Lisu
, [ 0xA500, 0xA63F ] // Vai
, [ 0xA640, 0xA69F ] // Cyrillic Extended-B
, [ 0xA6A0, 0xA6FF ] // Bamum
, [ 0xA700, 0xA71F ] // Modifier Tone Letters
, [ 0xA720, 0xA7FF ] // Latin Extended-D
, [ 0xA800, 0xA82F ] // Syloti Nagri
, [ 0xA830, 0xA83F ] // Common Indic Number Forms
, [ 0xA840, 0xA87F ] // Phags-pa
, [ 0xA880, 0xA8DF ] // Saurashtra
, [ 0xA8E0, 0xA8FF ] // Devanagari Extended
, [ 0xA900, 0xA92F ] // Kayah Li
, [ 0xA930, 0xA95F ] // Rejang
, [ 0xA960, 0xA97F ] // Hangul Jamo Extended-A
, [ 0xA980, 0xA9DF ] // Javanese
, [ 0xA9E0, 0xA9FF ] // Myanmar Extended-B
, [ 0xAA00, 0xAA5F ] // Cham
, [ 0xAA60, 0xAA7F ] // Myanmar Extended-A
, [ 0xAA80, 0xAADF ] // Tai Viet
, [ 0xAAE0, 0xAAFF ] // Meetei Mayek Extensions
, [ 0xAB00, 0xAB2F ] // Ethiopic Extended-A
, [ 0xAB30, 0xAB6F ] // Latin Extended-E
, [ 0xAB70, 0xABBF ] // Cherokee Supplement
, [ 0xABC0, 0xABFF ] // Meetei Mayek
, [ 0xAC00, 0xD7AF ] // Hangul Syllables
, [ 0xD7B0, 0xD7FF ] // Hangul Jamo Extended-B
]
,
[ // Surrogates
, [ 0xD800, 0xDBFF ] // High Surrogates
, [ 0xDC00, 0xDFFF ] // Low Surrogates
, [ 0xE000, 0xF8FF ] // Private Use Area
, [ 0xF900, 0xFAFF ] // CJK Compatibility Ideographs
, [ 0xFB00, 0xFB4F ] // Alphabetic Presentation Forms
, [ 0xFB50, 0xFDFF ] // Arabic Presentation Forms-A
, [ 0xFE00, 0xFE0F ] // Variation Selectors
, [ 0xFE10, 0xFE1F ] // Vertical Forms
, [ 0xFE20, 0xFE2F ] // Combining Half Marks
, [ 0xFE30, 0xFE4F ] // CJK Compatibility Forms
, [ 0xFE50, 0xFE6F ] // Small Form Variants
, [ 0xFE70, 0xFEFF ] // Arabic Presentation Forms-B
, [ 0xFF00, 0xFFEF ] // Halfwidth and Fullwidth Forms
, [ 0xFFF0, 0xFFFF ] // Specials
]
,
[ // Symbols
// Basic Latin
[ 0x0021, 0x002F ], [ 0x003A, 0x0040 ], [ 0x005B, 0x0060 ], [ 0x007B, 0x007E ],
// C1 Controls and Latin-1 Supplement (Extended ASCII)
[ 0x00A1, 0x00AC ], [ 0x00AE, 0x00BF ],
]
];
var NUM_KINGDOM = KINGDOMS.length;
var START = 0;
var END = 1;
var GetKingdom = function( Char )
{
var charCode = Char.charCodeAt( 0 );
for( var i = 0; i < NUM_KINGDOM; i ++ )
{
var kingdom = KINGDOMS[ i ];
var zLen = kingdom.length;
for( var j = 0; j < zLen; j ++ )
{
var zone = kingdom[ j ];
if( zone[ START ] <= charCode && charCode <= zone[ END ] )
{
return kingdom;
}
}
}
return [];
};
var Word = function( p )
{
this.kingdom = GetKingdom( p );
};
Word.prototype.test = function( p )
{
if( p == undefined || p == null || !p.charCodeAt ) return false;
var kingdom = this.kingdom;
var zLen = kingdom.length;
var charCode = p.charCodeAt( 0 );
for( var j = 0; j < zLen; j ++ )
{
var zone = kingdom[ j ];
if( zone[ START ] <= charCode && charCode <= zone[ END ] )
{
return true;
}
}
return false;
};
ns[ NS_EXPORT ]( EX_CLASS, "Word", Word );
})();

View File

@ -4,4 +4,6 @@ Components.Vim.Syntax.Analyzer = function(){};
/** @type Function */
Components.Vim.Syntax.Analyzer.bracketAt;
/** @type Function */
Components.Vim.Syntax.Analyzer.wordAt;
/** @type Function */
Components.Vim.Syntax.Analyzer.quoteAt;

View File

@ -0,0 +1,5 @@
/** @constructor */
Components.Vim.Syntax.Word = function(){};
/** @type Function */
Components.Vim.Syntax.Word.test;