]> git.r.bdr.sh - rbdr/dotfiles/blob
a379989775737af8d8c56e68b1d1cec0bc97a08e
[rbdr/dotfiles] /
1 // Taken from: https://github.com/walling/unorm/blob/master/lib/unorm.js
2
3 /*
4 * UnicodeNormalizer 1.0.0
5 * Copyright (c) 2008 Matsuza
6 * Dual licensed under the MIT (MIT-LICENSE.txt) and
7 * GPL (GPL-LICENSE.txt) licenses.
8 * $Date: 2008-06-05 16:44:17 +0200 (Thu, 05 Jun 2008) $
9 * $Rev: 13309 $
10 */
11
12 'use strict';
13
14 var primitiveSet = require('../../../object/primitive-set')
15 , validValue = require('../../../object/valid-value')
16 , data = require('./_data')
17
18 , floor = Math.floor
19 , forms = primitiveSet('NFC', 'NFD', 'NFKC', 'NFKD')
20
21 , DEFAULT_FEATURE = [null, 0, {}], CACHE_THRESHOLD = 10, SBase = 0xAC00
22 , LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7, LCount = 19, VCount = 21
23 , TCount = 28, NCount = VCount * TCount, SCount = LCount * NCount
24 , UChar, cache = {}, cacheCounter = [], i, fromCache, fromData, fromCpOnly
25 , fromRuleBasedJamo, fromCpFilter, strategies, UCharIterator
26 , RecursDecompIterator, DecompIterator, CompIterator, createIterator
27 , normalize;
28
29 UChar = function (cp, feature) {
30 this.codepoint = cp;
31 this.feature = feature;
32 };
33
34 // Strategies
35 for (i = 0; i <= 0xFF; ++i) cacheCounter[i] = 0;
36
37 fromCache = function (next, cp, needFeature) {
38 var ret = cache[cp];
39 if (!ret) {
40 ret = next(cp, needFeature);
41 if (!!ret.feature && ++cacheCounter[(cp >> 8) & 0xFF] > CACHE_THRESHOLD) {
42 cache[cp] = ret;
43 }
44 }
45 return ret;
46 };
47
48 fromData = function (next, cp, needFeature) {
49 var hash = cp & 0xFF00, dunit = UChar.udata[hash] || {}, f = dunit[cp];
50 return f ? new UChar(cp, f) : new UChar(cp, DEFAULT_FEATURE);
51 };
52 fromCpOnly = function (next, cp, needFeature) {
53 return !!needFeature ? next(cp, needFeature) : new UChar(cp, null);
54 };
55
56 fromRuleBasedJamo = function (next, cp, needFeature) {
57 var c, base, i, arr, SIndex, TIndex, feature, j;
58 if (cp < LBase || (LBase + LCount <= cp && cp < SBase) ||
59 (SBase + SCount < cp)) {
60 return next(cp, needFeature);
61 }
62 if (LBase <= cp && cp < LBase + LCount) {
63 c = {};
64 base = (cp - LBase) * VCount;
65 for (i = 0; i < VCount; ++i) {
66 c[VBase + i] = SBase + TCount * (i + base);
67 }
68 arr = new Array(3);
69 arr[2] = c;
70 return new UChar(cp, arr);
71 }
72
73 SIndex = cp - SBase;
74 TIndex = SIndex % TCount;
75 feature = [];
76 if (TIndex !== 0) {
77 feature[0] = [SBase + SIndex - TIndex, TBase + TIndex];
78 } else {
79 feature[0] = [LBase + floor(SIndex / NCount), VBase +
80 floor((SIndex % NCount) / TCount)];
81 feature[2] = {};
82 for (j = 1; j < TCount; ++j) {
83 feature[2][TBase + j] = cp + j;
84 }
85 }
86 return new UChar(cp, feature);
87 };
88
89 fromCpFilter = function (next, cp, needFeature) {
90 return (cp < 60) || ((13311 < cp) && (cp < 42607))
91 ? new UChar(cp, DEFAULT_FEATURE) : next(cp, needFeature);
92 };
93
94 strategies = [fromCpFilter, fromCache, fromCpOnly, fromRuleBasedJamo, fromData];
95
96 UChar.fromCharCode = strategies.reduceRight(function (next, strategy) {
97 return function (cp, needFeature) { return strategy(next, cp, needFeature); };
98 }, null);
99
100 UChar.isHighSurrogate = function (cp) { return cp >= 0xD800 && cp <= 0xDBFF; };
101 UChar.isLowSurrogate = function (cp) { return cp >= 0xDC00 && cp <= 0xDFFF; };
102
103 UChar.prototype.prepFeature = function () {
104 if (!this.feature) {
105 this.feature = UChar.fromCharCode(this.codepoint, true).feature;
106 }
107 };
108
109 UChar.prototype.toString = function () {
110 var x;
111 if (this.codepoint < 0x10000) return String.fromCharCode(this.codepoint);
112 x = this.codepoint - 0x10000;
113 return String.fromCharCode(floor(x / 0x400) + 0xD800, x % 0x400 + 0xDC00);
114 };
115
116 UChar.prototype.getDecomp = function () {
117 this.prepFeature();
118 return this.feature[0] || null;
119 };
120
121 UChar.prototype.isCompatibility = function () {
122 this.prepFeature();
123 return !!this.feature[1] && (this.feature[1] & (1 << 8));
124 };
125 UChar.prototype.isExclude = function () {
126 this.prepFeature();
127 return !!this.feature[1] && (this.feature[1] & (1 << 9));
128 };
129 UChar.prototype.getCanonicalClass = function () {
130 this.prepFeature();
131 return !!this.feature[1] ? (this.feature[1] & 0xff) : 0;
132 };
133 UChar.prototype.getComposite = function (following) {
134 var cp;
135 this.prepFeature();
136 if (!this.feature[2]) return null;
137 cp = this.feature[2][following.codepoint];
138 return cp ? UChar.fromCharCode(cp) : null;
139 };
140
141 UCharIterator = function (str) {
142 this.str = str;
143 this.cursor = 0;
144 };
145 UCharIterator.prototype.next = function () {
146 if (!!this.str && this.cursor < this.str.length) {
147 var cp = this.str.charCodeAt(this.cursor++), d;
148 if (UChar.isHighSurrogate(cp) && this.cursor < this.str.length &&
149 UChar.isLowSurrogate((d = this.str.charCodeAt(this.cursor)))) {
150 cp = (cp - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000;
151 ++this.cursor;
152 }
153 return UChar.fromCharCode(cp);
154 }
155 this.str = null;
156 return null;
157 };
158
159 RecursDecompIterator = function (it, cano) {
160 this.it = it;
161 this.canonical = cano;
162 this.resBuf = [];
163 };
164
165 RecursDecompIterator.prototype.next = function () {
166 var recursiveDecomp, uchar;
167 recursiveDecomp = function (cano, uchar) {
168 var decomp = uchar.getDecomp(), ret, i, a, j;
169 if (!!decomp && !(cano && uchar.isCompatibility())) {
170 ret = [];
171 for (i = 0; i < decomp.length; ++i) {
172 a = recursiveDecomp(cano, UChar.fromCharCode(decomp[i]));
173 //ret.concat(a); //<-why does not this work?
174 //following block is a workaround.
175 for (j = 0; j < a.length; ++j) ret.push(a[j]);
176 }
177 return ret;
178 }
179 return [uchar];
180 };
181 if (this.resBuf.length === 0) {
182 uchar = this.it.next();
183 if (!uchar) return null;
184 this.resBuf = recursiveDecomp(this.canonical, uchar);
185 }
186 return this.resBuf.shift();
187 };
188
189 DecompIterator = function (it) {
190 this.it = it;
191 this.resBuf = [];
192 };
193
194 DecompIterator.prototype.next = function () {
195 var cc, uchar, inspt, uchar2, cc2;
196 if (this.resBuf.length === 0) {
197 do {
198 uchar = this.it.next();
199 if (!uchar) break;
200 cc = uchar.getCanonicalClass();
201 inspt = this.resBuf.length;
202 if (cc !== 0) {
203 for (inspt; inspt > 0; --inspt) {
204 uchar2 = this.resBuf[inspt - 1];
205 cc2 = uchar2.getCanonicalClass();
206 if (cc2 <= cc) break;
207 }
208 }
209 this.resBuf.splice(inspt, 0, uchar);
210 } while (cc !== 0);
211 }
212 return this.resBuf.shift();
213 };
214
215 CompIterator = function (it) {
216 this.it = it;
217 this.procBuf = [];
218 this.resBuf = [];
219 this.lastClass = null;
220 };
221
222 CompIterator.prototype.next = function () {
223 var uchar, starter, composite, cc;
224 while (this.resBuf.length === 0) {
225 uchar = this.it.next();
226 if (!uchar) {
227 this.resBuf = this.procBuf;
228 this.procBuf = [];
229 break;
230 }
231 if (this.procBuf.length === 0) {
232 this.lastClass = uchar.getCanonicalClass();
233 this.procBuf.push(uchar);
234 } else {
235 starter = this.procBuf[0];
236 composite = starter.getComposite(uchar);
237 cc = uchar.getCanonicalClass();
238 if (!!composite && (this.lastClass < cc || this.lastClass === 0)) {
239 this.procBuf[0] = composite;
240 } else {
241 if (cc === 0) {
242 this.resBuf = this.procBuf;
243 this.procBuf = [];
244 }
245 this.lastClass = cc;
246 this.procBuf.push(uchar);
247 }
248 }
249 }
250 return this.resBuf.shift();
251 };
252
253 createIterator = function (mode, str) {
254 switch (mode) {
255 case "NFD":
256 return new DecompIterator(
257 new RecursDecompIterator(new UCharIterator(str), true)
258 );
259 case "NFKD":
260 return new DecompIterator(
261 new RecursDecompIterator(new UCharIterator(str), false)
262 );
263 case "NFC":
264 return new CompIterator(new DecompIterator(
265 new RecursDecompIterator(new UCharIterator(str), true)
266 ));
267 case "NFKC":
268 return new CompIterator(new DecompIterator(
269 new RecursDecompIterator(new UCharIterator(str), false)
270 ));
271 }
272 throw mode + " is invalid";
273 };
274 normalize = function (mode, str) {
275 var it = createIterator(mode, str), ret = "", uchar;
276 while (!!(uchar = it.next())) ret += uchar.toString();
277 return ret;
278 };
279
280 /* Unicode data */
281 UChar.udata = data;
282
283 module.exports = function (/*form*/) {
284 var str = String(validValue(this)), form = arguments[0];
285 if (form === undefined) form = 'NFC';
286 else form = String(form);
287 if (!forms[form]) throw new RangeError('Invalid normalization form: ' + form);
288 return normalize(form, str);
289 };