unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

unicode.ml (34037B)


      1 
      2 open Unicode_tables
      3 
      4 exception Invalid
      5 
      6 let fail () = raise Invalid
      7 
      8 let get s i = Char.code (String.unsafe_get s i)
      9 let set s i v = String.unsafe_set s i (Char.unsafe_chr v)
     10 
     11 (****)
     12 
     13 let hangul_sbase = 0xAC00
     14 let hangul_lbase = 0x1100
     15 let hangul_vbase = 0x1161
     16 let hangul_tbase = 0x11A7
     17 
     18 let hangul_scount = 11172
     19 let hangul_lcount = 19
     20 let hangul_vcount = 21
     21 let hangul_tcount = 28
     22 let hangul_ncount = hangul_vcount * hangul_tcount
     23 
     24 let set_char_3 s i c =
     25   set s i (c lsr 12 + 0xE0);
     26   set s (i + 1) ((c lsr 6) land 0x3f + 0x80);
     27   set s (i + 2) (c land 0x3f + 0x80)
     28 
     29 let rec norm s i l s' j =
     30   if i < l then begin
     31     let c = get s i in
     32     if c < 0x80 then begin
     33       set s' j (get ascii_lower c);
     34       norm s (i + 1) l s' (j + 1)
     35     end else if c < 0xE0 then begin
     36       (* 80 - 7FF *)
     37       if c < 0xc2 || i + 1 >= l then raise Invalid;
     38       let c1 = get s (i + 1) in
     39       if c1 land 0xc0 <> 0x80 then raise Invalid;
     40       let idx = get norm_prim (c - 0xc0) in
     41       let idx = idx lsl 6 + c1 - 0x80 in
     42       let k = get norm_second_high idx in
     43       if k = 0 then begin
     44         set s' j c;
     45         set s' (j + 1) c1;
     46         norm s (i + 2) l s' (j + 2)
     47       end else begin
     48         let k = (k - 2) lsl 8 + get norm_second_low idx in
     49         let n = get norm_repl k in
     50         String.blit norm_repl (k + 1) s' j n;
     51         norm s (i + 2) l s' (j + n)
     52       end
     53     end else if c < 0xF0 then begin
     54       (* 800 - FFFF *)
     55       if i + 2 >= l then raise Invalid;
     56       let c1 = get s (i + 1) in
     57       if c1 land 0xc0 <> 0x80 then raise Invalid;
     58       let idx = c lsl 6 + c1 - 0x3880 in
     59       if idx < 0x20 then raise Invalid;
     60       let c2 = get s (i + 2) in
     61       if c2 land 0xc0 <> 0x80 then raise Invalid;
     62       let idx = get norm_prim idx in
     63       let idx = idx lsl 6 + c2 - 0x80 in
     64       let k = get norm_second_high idx in
     65       if k = 0 then begin
     66         set s' j c;
     67         set s' (j + 1) c1;
     68         set s' (j + 2) c2;
     69         norm s (i + 3) l s' (j + 3)
     70       end else if k = 1 then begin
     71         let v = c lsl 12 + c1 lsl 6 + c2 - (0x000E2080 + hangul_sbase) in
     72         if v >= hangul_scount then begin
     73           set s' j c;
     74           set s' (j + 1) c1;
     75           set s' (j + 2) c2;
     76           norm s (i + 3) l s' (j + 3)
     77         end else begin
     78           set_char_3 s' j (v / hangul_ncount + hangul_lbase);
     79           set_char_3 s' (j + 3)
     80             ((v mod hangul_ncount) / hangul_tcount + hangul_vbase);
     81           if v mod hangul_tcount = 0 then
     82             norm s (i + 3) l s' (j + 6)
     83           else begin
     84             set_char_3 s' (j + 6) ((v mod hangul_tcount) + hangul_tbase);
     85             norm s (i + 3) l s' (j + 9)
     86           end
     87         end
     88       end else begin
     89         let k = (k - 2) lsl 8 + get norm_second_low idx in
     90         let n = get norm_repl k in
     91         String.blit norm_repl (k + 1) s' j n;
     92         norm s (i + 3) l s' (j + n)
     93       end
     94     end else begin
     95       (* 10000 - 10FFFF *)
     96       if i + 3 >= l then raise Invalid;
     97       let c1 = get s (i + 1) in
     98       let c2 = get s (i + 2) in
     99       let c3 = get s (i + 3) in
    100       if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then raise Invalid;
    101       let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
    102       if v < 0x10000 || v > 0x10ffff then raise Invalid;
    103       set s' j c;
    104       set s' (j + 1) c1;
    105       set s' (j + 2) c2;
    106       set s' (j + 3) c3;
    107       norm s (i + 4) l s' (j + 4)
    108     end
    109   end else
    110     String.sub s' 0 j
    111 
    112 let normalize s =
    113   let l = String.length s in
    114   let s' = Bytes.create (3 * l) in
    115   try norm s 0 l s' 0 with Invalid -> s
    116 
    117 (****)
    118 
    119 let rec compare_rec s s' i l =
    120   if i = l then begin
    121     if l < String.length s then 1 else
    122     if l < String.length s' then -1 else
    123     0
    124   end else begin
    125     let c = get s i in
    126     let c' = get s' i in
    127     if c < 0x80 && c' < 0x80 then begin
    128       let v = compare (get ascii_lower c) (get ascii_lower c') in
    129       if v <> 0 then v else compare_rec s s' (i + 1) l
    130     end else
    131       compare (normalize s) (normalize s')
    132   end
    133 
    134 let compare s s' =
    135   compare_rec s s' 0 (min (String.length s) (String.length s'))
    136 
    137 (****)
    138 
    139 let rec decode_char s i l =
    140   if i = l then fail () else
    141   let c = get s i in
    142   if c < 0x80 then
    143     cont s (i + 1) l c
    144   else if c < 0xE0 then begin
    145     (* 80 - 7FF *)
    146     if c < 0xc2 || i + 1 >= l then fail () else
    147     let c1 = get s (i + 1) in
    148     if c1 land 0xc0 <> 0x80 then fail () else
    149     let v = c lsl 6 + c1 - 0x3080 in
    150     cont s (i + 2) l v
    151   end else if c < 0xF0 then begin
    152     (* 800 - FFFF *)
    153     if i + 2 >= l then fail () else
    154     let c1 = get s (i + 1) in
    155     let c2 = get s (i + 2) in
    156     if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
    157     let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
    158     if v < 0x800 then fail () else
    159     cont s (i + 3) l v
    160   end else begin
    161     (* 10000 - 10FFFF *)
    162     if i + 3 >= l then fail () else
    163     let c1 = get s (i + 1) in
    164     let c2 = get s (i + 2) in
    165     let c3 = get s (i + 3) in
    166     if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
    167     let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
    168     if v < 0x10000 || v > 0x10ffff then fail () else
    169     cont s (i + 4) l v
    170   end
    171 
    172 and cont s i l v = (v, i)
    173 
    174 let encode_char s i l c =
    175   if c < 0x80 then begin
    176     if i >= l then fail () else begin
    177       set s i c;
    178       i + 1
    179     end
    180   end else if c < 0x800 then begin
    181     if i + 1 >= l then fail () else begin
    182       set s i (c lsr 6 + 0xC0);
    183       set s (i + 1) (c land 0x3f + 0x80);
    184       i + 2
    185     end
    186   end else if c < 0x10000 then begin
    187     if i + 1 >= l then fail () else begin
    188       set s i (c lsr 12 + 0xE0);
    189       set s (i + 1) ((c lsr 6) land 0x3f + 0x80);
    190       set s (i + 2) (c land 0x3f + 0x80);
    191       i + 3
    192     end
    193   end else begin
    194     if i + 1 >= l then fail () else begin
    195       set s i (c lsr 18 + 0xF0);
    196       set s (i + 1) ((c lsr 12) land 0x3f + 0x80);
    197       set s (i + 2) ((c lsr 6) land 0x3f + 0x80);
    198       set s (i + 3) (c land 0x3f + 0x80);
    199       i + 4
    200     end
    201   end
    202 
    203 let rec prev_char s i =
    204   let i = i - 1 in
    205   if i < 0 then fail () else
    206   if (get s i) land 0xc0 <> 0x80 then i else prev_char s i
    207 
    208 (****)
    209 
    210 let uniCharPrecompSourceTable = [|
    211         0x00000300; 0x00540000; 0x00000301; 0x00750054;
    212         0x00000302; 0x002000C9; 0x00000303; 0x001C00E9;
    213         0x00000304; 0x002C0105; 0x00000306; 0x00200131;
    214         0x00000307; 0x002E0151; 0x00000308; 0x0036017F;
    215         0x00000309; 0x001801B5; 0x0000030A; 0x000601CD;
    216         0x0000030B; 0x000601D3; 0x0000030C; 0x002501D9;
    217         0x0000030F; 0x000E01FE; 0x00000311; 0x000C020C;
    218         0x00000313; 0x000E0218; 0x00000314; 0x00100226;
    219         0x0000031B; 0x00040236; 0x00000323; 0x002A023A;
    220         0x00000324; 0x00020264; 0x00000325; 0x00020266;
    221         0x00000326; 0x00040268; 0x00000327; 0x0016026C;
    222         0x00000328; 0x000A0282; 0x0000032D; 0x000C028C;
    223         0x0000032E; 0x00020298; 0x00000330; 0x0006029A;
    224         0x00000331; 0x001102A0; 0x00000338; 0x002C02B1;
    225         0x00000342; 0x001D02DD; 0x00000345; 0x003F02FA;
    226         0x00000653; 0x00010339; 0x00000654; 0x0006033A;
    227         0x00000655; 0x00010340; 0x0000093C; 0x00030341;
    228         0x000009BE; 0x00010344; 0x000009D7; 0x00010345;
    229         0x00000B3E; 0x00010346; 0x00000B56; 0x00010347;
    230         0x00000B57; 0x00010348; 0x00000BBE; 0x00020349;
    231         0x00000BD7; 0x0002034B; 0x00000C56; 0x0001034D;
    232         0x00000CC2; 0x0001034E; 0x00000CD5; 0x0003034F;
    233         0x00000CD6; 0x00010352; 0x00000D3E; 0x00020353;
    234         0x00000D57; 0x00010355; 0x00000DCA; 0x00020356;
    235         0x00000DCF; 0x00010358; 0x00000DDF; 0x00010359;
    236         0x0000102E; 0x0001035A; 0x00003099; 0x0030035B;
    237         0x0000309A; 0x000A038B
    238 |]
    239 
    240 let uniCharBMPPrecompDestinationTable = [|
    241         0x0041; 0x00C0; 0x0045; 0x00C8; 0x0049; 0x00CC; 0x004E; 0x01F8;
    242         0x004F; 0x00D2; 0x0055; 0x00D9; 0x0057; 0x1E80; 0x0059; 0x1EF2;
    243         0x0061; 0x00E0; 0x0065; 0x00E8; 0x0069; 0x00EC; 0x006E; 0x01F9;
    244         0x006F; 0x00F2; 0x0075; 0x00F9; 0x0077; 0x1E81; 0x0079; 0x1EF3;
    245         0x00A8; 0x1FED; 0x00C2; 0x1EA6; 0x00CA; 0x1EC0; 0x00D4; 0x1ED2;
    246         0x00DC; 0x01DB; 0x00E2; 0x1EA7; 0x00EA; 0x1EC1; 0x00F4; 0x1ED3;
    247         0x00FC; 0x01DC; 0x0102; 0x1EB0; 0x0103; 0x1EB1; 0x0112; 0x1E14;
    248         0x0113; 0x1E15; 0x014C; 0x1E50; 0x014D; 0x1E51; 0x01A0; 0x1EDC;
    249         0x01A1; 0x1EDD; 0x01AF; 0x1EEA; 0x01B0; 0x1EEB; 0x0391; 0x1FBA;
    250         0x0395; 0x1FC8; 0x0397; 0x1FCA; 0x0399; 0x1FDA; 0x039F; 0x1FF8;
    251         0x03A5; 0x1FEA; 0x03A9; 0x1FFA; 0x03B1; 0x1F70; 0x03B5; 0x1F72;
    252         0x03B7; 0x1F74; 0x03B9; 0x1F76; 0x03BF; 0x1F78; 0x03C5; 0x1F7A;
    253         0x03C9; 0x1F7C; 0x03CA; 0x1FD2; 0x03CB; 0x1FE2; 0x0415; 0x0400;
    254         0x0418; 0x040D; 0x0435; 0x0450; 0x0438; 0x045D; 0x1F00; 0x1F02;
    255         0x1F01; 0x1F03; 0x1F08; 0x1F0A; 0x1F09; 0x1F0B; 0x1F10; 0x1F12;
    256         0x1F11; 0x1F13; 0x1F18; 0x1F1A; 0x1F19; 0x1F1B; 0x1F20; 0x1F22;
    257         0x1F21; 0x1F23; 0x1F28; 0x1F2A; 0x1F29; 0x1F2B; 0x1F30; 0x1F32;
    258         0x1F31; 0x1F33; 0x1F38; 0x1F3A; 0x1F39; 0x1F3B; 0x1F40; 0x1F42;
    259         0x1F41; 0x1F43; 0x1F48; 0x1F4A; 0x1F49; 0x1F4B; 0x1F50; 0x1F52;
    260         0x1F51; 0x1F53; 0x1F59; 0x1F5B; 0x1F60; 0x1F62; 0x1F61; 0x1F63;
    261         0x1F68; 0x1F6A; 0x1F69; 0x1F6B; 0x1FBF; 0x1FCD; 0x1FFE; 0x1FDD;
    262         0x0041; 0x00C1; 0x0043; 0x0106; 0x0045; 0x00C9; 0x0047; 0x01F4;
    263         0x0049; 0x00CD; 0x004B; 0x1E30; 0x004C; 0x0139; 0x004D; 0x1E3E;
    264         0x004E; 0x0143; 0x004F; 0x00D3; 0x0050; 0x1E54; 0x0052; 0x0154;
    265         0x0053; 0x015A; 0x0055; 0x00DA; 0x0057; 0x1E82; 0x0059; 0x00DD;
    266         0x005A; 0x0179; 0x0061; 0x00E1; 0x0063; 0x0107; 0x0065; 0x00E9;
    267         0x0067; 0x01F5; 0x0069; 0x00ED; 0x006B; 0x1E31; 0x006C; 0x013A;
    268         0x006D; 0x1E3F; 0x006E; 0x0144; 0x006F; 0x00F3; 0x0070; 0x1E55;
    269         0x0072; 0x0155; 0x0073; 0x015B; 0x0075; 0x00FA; 0x0077; 0x1E83;
    270         0x0079; 0x00FD; 0x007A; 0x017A; 0x00A8; 0x0385; 0x00C2; 0x1EA4;
    271         0x00C5; 0x01FA; 0x00C6; 0x01FC; 0x00C7; 0x1E08; 0x00CA; 0x1EBE;
    272         0x00CF; 0x1E2E; 0x00D4; 0x1ED0; 0x00D5; 0x1E4C; 0x00D8; 0x01FE;
    273         0x00DC; 0x01D7; 0x00E2; 0x1EA5; 0x00E5; 0x01FB; 0x00E6; 0x01FD;
    274         0x00E7; 0x1E09; 0x00EA; 0x1EBF; 0x00EF; 0x1E2F; 0x00F4; 0x1ED1;
    275         0x00F5; 0x1E4D; 0x00F8; 0x01FF; 0x00FC; 0x01D8; 0x0102; 0x1EAE;
    276         0x0103; 0x1EAF; 0x0112; 0x1E16; 0x0113; 0x1E17; 0x014C; 0x1E52;
    277         0x014D; 0x1E53; 0x0168; 0x1E78; 0x0169; 0x1E79; 0x01A0; 0x1EDA;
    278         0x01A1; 0x1EDB; 0x01AF; 0x1EE8; 0x01B0; 0x1EE9; 0x0391; 0x0386;
    279         0x0395; 0x0388; 0x0397; 0x0389; 0x0399; 0x038A; 0x039F; 0x038C;
    280         0x03A5; 0x038E; 0x03A9; 0x038F; 0x03B1; 0x03AC; 0x03B5; 0x03AD;
    281         0x03B7; 0x03AE; 0x03B9; 0x03AF; 0x03BF; 0x03CC; 0x03C5; 0x03CD;
    282         0x03C9; 0x03CE; 0x03CA; 0x0390; 0x03CB; 0x03B0; 0x03D2; 0x03D3;
    283         0x0413; 0x0403; 0x041A; 0x040C; 0x0433; 0x0453; 0x043A; 0x045C;
    284         0x1F00; 0x1F04; 0x1F01; 0x1F05; 0x1F08; 0x1F0C; 0x1F09; 0x1F0D;
    285         0x1F10; 0x1F14; 0x1F11; 0x1F15; 0x1F18; 0x1F1C; 0x1F19; 0x1F1D;
    286         0x1F20; 0x1F24; 0x1F21; 0x1F25; 0x1F28; 0x1F2C; 0x1F29; 0x1F2D;
    287         0x1F30; 0x1F34; 0x1F31; 0x1F35; 0x1F38; 0x1F3C; 0x1F39; 0x1F3D;
    288         0x1F40; 0x1F44; 0x1F41; 0x1F45; 0x1F48; 0x1F4C; 0x1F49; 0x1F4D;
    289         0x1F50; 0x1F54; 0x1F51; 0x1F55; 0x1F59; 0x1F5D; 0x1F60; 0x1F64;
    290         0x1F61; 0x1F65; 0x1F68; 0x1F6C; 0x1F69; 0x1F6D; 0x1FBF; 0x1FCE;
    291         0x1FFE; 0x1FDE; 0x0041; 0x00C2; 0x0043; 0x0108; 0x0045; 0x00CA;
    292         0x0047; 0x011C; 0x0048; 0x0124; 0x0049; 0x00CE; 0x004A; 0x0134;
    293         0x004F; 0x00D4; 0x0053; 0x015C; 0x0055; 0x00DB; 0x0057; 0x0174;
    294         0x0059; 0x0176; 0x005A; 0x1E90; 0x0061; 0x00E2; 0x0063; 0x0109;
    295         0x0065; 0x00EA; 0x0067; 0x011D; 0x0068; 0x0125; 0x0069; 0x00EE;
    296         0x006A; 0x0135; 0x006F; 0x00F4; 0x0073; 0x015D; 0x0075; 0x00FB;
    297         0x0077; 0x0175; 0x0079; 0x0177; 0x007A; 0x1E91; 0x1EA0; 0x1EAC;
    298         0x1EA1; 0x1EAD; 0x1EB8; 0x1EC6; 0x1EB9; 0x1EC7; 0x1ECC; 0x1ED8;
    299         0x1ECD; 0x1ED9; 0x0041; 0x00C3; 0x0045; 0x1EBC; 0x0049; 0x0128;
    300         0x004E; 0x00D1; 0x004F; 0x00D5; 0x0055; 0x0168; 0x0056; 0x1E7C;
    301         0x0059; 0x1EF8; 0x0061; 0x00E3; 0x0065; 0x1EBD; 0x0069; 0x0129;
    302         0x006E; 0x00F1; 0x006F; 0x00F5; 0x0075; 0x0169; 0x0076; 0x1E7D;
    303         0x0079; 0x1EF9; 0x00C2; 0x1EAA; 0x00CA; 0x1EC4; 0x00D4; 0x1ED6;
    304         0x00E2; 0x1EAB; 0x00EA; 0x1EC5; 0x00F4; 0x1ED7; 0x0102; 0x1EB4;
    305         0x0103; 0x1EB5; 0x01A0; 0x1EE0; 0x01A1; 0x1EE1; 0x01AF; 0x1EEE;
    306         0x01B0; 0x1EEF; 0x0041; 0x0100; 0x0045; 0x0112; 0x0047; 0x1E20;
    307         0x0049; 0x012A; 0x004F; 0x014C; 0x0055; 0x016A; 0x0059; 0x0232;
    308         0x0061; 0x0101; 0x0065; 0x0113; 0x0067; 0x1E21; 0x0069; 0x012B;
    309         0x006F; 0x014D; 0x0075; 0x016B; 0x0079; 0x0233; 0x00C4; 0x01DE;
    310         0x00C6; 0x01E2; 0x00D5; 0x022C; 0x00D6; 0x022A; 0x00DC; 0x01D5;
    311         0x00E4; 0x01DF; 0x00E6; 0x01E3; 0x00F5; 0x022D; 0x00F6; 0x022B;
    312         0x00FC; 0x01D6; 0x01EA; 0x01EC; 0x01EB; 0x01ED; 0x0226; 0x01E0;
    313         0x0227; 0x01E1; 0x022E; 0x0230; 0x022F; 0x0231; 0x0391; 0x1FB9;
    314         0x0399; 0x1FD9; 0x03A5; 0x1FE9; 0x03B1; 0x1FB1; 0x03B9; 0x1FD1;
    315         0x03C5; 0x1FE1; 0x0418; 0x04E2; 0x0423; 0x04EE; 0x0438; 0x04E3;
    316         0x0443; 0x04EF; 0x1E36; 0x1E38; 0x1E37; 0x1E39; 0x1E5A; 0x1E5C;
    317         0x1E5B; 0x1E5D; 0x0041; 0x0102; 0x0045; 0x0114; 0x0047; 0x011E;
    318         0x0049; 0x012C; 0x004F; 0x014E; 0x0055; 0x016C; 0x0061; 0x0103;
    319         0x0065; 0x0115; 0x0067; 0x011F; 0x0069; 0x012D; 0x006F; 0x014F;
    320         0x0075; 0x016D; 0x0228; 0x1E1C; 0x0229; 0x1E1D; 0x0391; 0x1FB8;
    321         0x0399; 0x1FD8; 0x03A5; 0x1FE8; 0x03B1; 0x1FB0; 0x03B9; 0x1FD0;
    322         0x03C5; 0x1FE0; 0x0410; 0x04D0; 0x0415; 0x04D6; 0x0416; 0x04C1;
    323         0x0418; 0x0419; 0x0423; 0x040E; 0x0430; 0x04D1; 0x0435; 0x04D7;
    324         0x0436; 0x04C2; 0x0438; 0x0439; 0x0443; 0x045E; 0x1EA0; 0x1EB6;
    325         0x1EA1; 0x1EB7; 0x0041; 0x0226; 0x0042; 0x1E02; 0x0043; 0x010A;
    326         0x0044; 0x1E0A; 0x0045; 0x0116; 0x0046; 0x1E1E; 0x0047; 0x0120;
    327         0x0048; 0x1E22; 0x0049; 0x0130; 0x004D; 0x1E40; 0x004E; 0x1E44;
    328         0x004F; 0x022E; 0x0050; 0x1E56; 0x0052; 0x1E58; 0x0053; 0x1E60;
    329         0x0054; 0x1E6A; 0x0057; 0x1E86; 0x0058; 0x1E8A; 0x0059; 0x1E8E;
    330         0x005A; 0x017B; 0x0061; 0x0227; 0x0062; 0x1E03; 0x0063; 0x010B;
    331         0x0064; 0x1E0B; 0x0065; 0x0117; 0x0066; 0x1E1F; 0x0067; 0x0121;
    332         0x0068; 0x1E23; 0x006D; 0x1E41; 0x006E; 0x1E45; 0x006F; 0x022F;
    333         0x0070; 0x1E57; 0x0072; 0x1E59; 0x0073; 0x1E61; 0x0074; 0x1E6B;
    334         0x0077; 0x1E87; 0x0078; 0x1E8B; 0x0079; 0x1E8F; 0x007A; 0x017C;
    335         0x015A; 0x1E64; 0x015B; 0x1E65; 0x0160; 0x1E66; 0x0161; 0x1E67;
    336         0x017F; 0x1E9B; 0x1E62; 0x1E68; 0x1E63; 0x1E69; 0x0041; 0x00C4;
    337         0x0045; 0x00CB; 0x0048; 0x1E26; 0x0049; 0x00CF; 0x004F; 0x00D6;
    338         0x0055; 0x00DC; 0x0057; 0x1E84; 0x0058; 0x1E8C; 0x0059; 0x0178;
    339         0x0061; 0x00E4; 0x0065; 0x00EB; 0x0068; 0x1E27; 0x0069; 0x00EF;
    340         0x006F; 0x00F6; 0x0074; 0x1E97; 0x0075; 0x00FC; 0x0077; 0x1E85;
    341         0x0078; 0x1E8D; 0x0079; 0x00FF; 0x00D5; 0x1E4E; 0x00F5; 0x1E4F;
    342         0x016A; 0x1E7A; 0x016B; 0x1E7B; 0x0399; 0x03AA; 0x03A5; 0x03AB;
    343         0x03B9; 0x03CA; 0x03C5; 0x03CB; 0x03D2; 0x03D4; 0x0406; 0x0407;
    344         0x0410; 0x04D2; 0x0415; 0x0401; 0x0416; 0x04DC; 0x0417; 0x04DE;
    345         0x0418; 0x04E4; 0x041E; 0x04E6; 0x0423; 0x04F0; 0x0427; 0x04F4;
    346         0x042B; 0x04F8; 0x042D; 0x04EC; 0x0430; 0x04D3; 0x0435; 0x0451;
    347         0x0436; 0x04DD; 0x0437; 0x04DF; 0x0438; 0x04E5; 0x043E; 0x04E7;
    348         0x0443; 0x04F1; 0x0447; 0x04F5; 0x044B; 0x04F9; 0x044D; 0x04ED;
    349         0x0456; 0x0457; 0x04D8; 0x04DA; 0x04D9; 0x04DB; 0x04E8; 0x04EA;
    350         0x04E9; 0x04EB; 0x0041; 0x1EA2; 0x0045; 0x1EBA; 0x0049; 0x1EC8;
    351         0x004F; 0x1ECE; 0x0055; 0x1EE6; 0x0059; 0x1EF6; 0x0061; 0x1EA3;
    352         0x0065; 0x1EBB; 0x0069; 0x1EC9; 0x006F; 0x1ECF; 0x0075; 0x1EE7;
    353         0x0079; 0x1EF7; 0x00C2; 0x1EA8; 0x00CA; 0x1EC2; 0x00D4; 0x1ED4;
    354         0x00E2; 0x1EA9; 0x00EA; 0x1EC3; 0x00F4; 0x1ED5; 0x0102; 0x1EB2;
    355         0x0103; 0x1EB3; 0x01A0; 0x1EDE; 0x01A1; 0x1EDF; 0x01AF; 0x1EEC;
    356         0x01B0; 0x1EED; 0x0041; 0x00C5; 0x0055; 0x016E; 0x0061; 0x00E5;
    357         0x0075; 0x016F; 0x0077; 0x1E98; 0x0079; 0x1E99; 0x004F; 0x0150;
    358         0x0055; 0x0170; 0x006F; 0x0151; 0x0075; 0x0171; 0x0423; 0x04F2;
    359         0x0443; 0x04F3; 0x0041; 0x01CD; 0x0043; 0x010C; 0x0044; 0x010E;
    360         0x0045; 0x011A; 0x0047; 0x01E6; 0x0048; 0x021E; 0x0049; 0x01CF;
    361         0x004B; 0x01E8; 0x004C; 0x013D; 0x004E; 0x0147; 0x004F; 0x01D1;
    362         0x0052; 0x0158; 0x0053; 0x0160; 0x0054; 0x0164; 0x0055; 0x01D3;
    363         0x005A; 0x017D; 0x0061; 0x01CE; 0x0063; 0x010D; 0x0064; 0x010F;
    364         0x0065; 0x011B; 0x0067; 0x01E7; 0x0068; 0x021F; 0x0069; 0x01D0;
    365         0x006A; 0x01F0; 0x006B; 0x01E9; 0x006C; 0x013E; 0x006E; 0x0148;
    366         0x006F; 0x01D2; 0x0072; 0x0159; 0x0073; 0x0161; 0x0074; 0x0165;
    367         0x0075; 0x01D4; 0x007A; 0x017E; 0x00DC; 0x01D9; 0x00FC; 0x01DA;
    368         0x01B7; 0x01EE; 0x0292; 0x01EF; 0x0041; 0x0200; 0x0045; 0x0204;
    369         0x0049; 0x0208; 0x004F; 0x020C; 0x0052; 0x0210; 0x0055; 0x0214;
    370         0x0061; 0x0201; 0x0065; 0x0205; 0x0069; 0x0209; 0x006F; 0x020D;
    371         0x0072; 0x0211; 0x0075; 0x0215; 0x0474; 0x0476; 0x0475; 0x0477;
    372         0x0041; 0x0202; 0x0045; 0x0206; 0x0049; 0x020A; 0x004F; 0x020E;
    373         0x0052; 0x0212; 0x0055; 0x0216; 0x0061; 0x0203; 0x0065; 0x0207;
    374         0x0069; 0x020B; 0x006F; 0x020F; 0x0072; 0x0213; 0x0075; 0x0217;
    375         0x0391; 0x1F08; 0x0395; 0x1F18; 0x0397; 0x1F28; 0x0399; 0x1F38;
    376         0x039F; 0x1F48; 0x03A9; 0x1F68; 0x03B1; 0x1F00; 0x03B5; 0x1F10;
    377         0x03B7; 0x1F20; 0x03B9; 0x1F30; 0x03BF; 0x1F40; 0x03C1; 0x1FE4;
    378         0x03C5; 0x1F50; 0x03C9; 0x1F60; 0x0391; 0x1F09; 0x0395; 0x1F19;
    379         0x0397; 0x1F29; 0x0399; 0x1F39; 0x039F; 0x1F49; 0x03A1; 0x1FEC;
    380         0x03A5; 0x1F59; 0x03A9; 0x1F69; 0x03B1; 0x1F01; 0x03B5; 0x1F11;
    381         0x03B7; 0x1F21; 0x03B9; 0x1F31; 0x03BF; 0x1F41; 0x03C1; 0x1FE5;
    382         0x03C5; 0x1F51; 0x03C9; 0x1F61; 0x004F; 0x01A0; 0x0055; 0x01AF;
    383         0x006F; 0x01A1; 0x0075; 0x01B0; 0x0041; 0x1EA0; 0x0042; 0x1E04;
    384         0x0044; 0x1E0C; 0x0045; 0x1EB8; 0x0048; 0x1E24; 0x0049; 0x1ECA;
    385         0x004B; 0x1E32; 0x004C; 0x1E36; 0x004D; 0x1E42; 0x004E; 0x1E46;
    386         0x004F; 0x1ECC; 0x0052; 0x1E5A; 0x0053; 0x1E62; 0x0054; 0x1E6C;
    387         0x0055; 0x1EE4; 0x0056; 0x1E7E; 0x0057; 0x1E88; 0x0059; 0x1EF4;
    388         0x005A; 0x1E92; 0x0061; 0x1EA1; 0x0062; 0x1E05; 0x0064; 0x1E0D;
    389         0x0065; 0x1EB9; 0x0068; 0x1E25; 0x0069; 0x1ECB; 0x006B; 0x1E33;
    390         0x006C; 0x1E37; 0x006D; 0x1E43; 0x006E; 0x1E47; 0x006F; 0x1ECD;
    391         0x0072; 0x1E5B; 0x0073; 0x1E63; 0x0074; 0x1E6D; 0x0075; 0x1EE5;
    392         0x0076; 0x1E7F; 0x0077; 0x1E89; 0x0079; 0x1EF5; 0x007A; 0x1E93;
    393         0x01A0; 0x1EE2; 0x01A1; 0x1EE3; 0x01AF; 0x1EF0; 0x01B0; 0x1EF1;
    394         0x0055; 0x1E72; 0x0075; 0x1E73; 0x0041; 0x1E00; 0x0061; 0x1E01;
    395         0x0053; 0x0218; 0x0054; 0x021A; 0x0073; 0x0219; 0x0074; 0x021B;
    396         0x0043; 0x00C7; 0x0044; 0x1E10; 0x0045; 0x0228; 0x0047; 0x0122;
    397         0x0048; 0x1E28; 0x004B; 0x0136; 0x004C; 0x013B; 0x004E; 0x0145;
    398         0x0052; 0x0156; 0x0053; 0x015E; 0x0054; 0x0162; 0x0063; 0x00E7;
    399         0x0064; 0x1E11; 0x0065; 0x0229; 0x0067; 0x0123; 0x0068; 0x1E29;
    400         0x006B; 0x0137; 0x006C; 0x013C; 0x006E; 0x0146; 0x0072; 0x0157;
    401         0x0073; 0x015F; 0x0074; 0x0163; 0x0041; 0x0104; 0x0045; 0x0118;
    402         0x0049; 0x012E; 0x004F; 0x01EA; 0x0055; 0x0172; 0x0061; 0x0105;
    403         0x0065; 0x0119; 0x0069; 0x012F; 0x006F; 0x01EB; 0x0075; 0x0173;
    404         0x0044; 0x1E12; 0x0045; 0x1E18; 0x004C; 0x1E3C; 0x004E; 0x1E4A;
    405         0x0054; 0x1E70; 0x0055; 0x1E76; 0x0064; 0x1E13; 0x0065; 0x1E19;
    406         0x006C; 0x1E3D; 0x006E; 0x1E4B; 0x0074; 0x1E71; 0x0075; 0x1E77;
    407         0x0048; 0x1E2A; 0x0068; 0x1E2B; 0x0045; 0x1E1A; 0x0049; 0x1E2C;
    408         0x0055; 0x1E74; 0x0065; 0x1E1B; 0x0069; 0x1E2D; 0x0075; 0x1E75;
    409         0x0042; 0x1E06; 0x0044; 0x1E0E; 0x004B; 0x1E34; 0x004C; 0x1E3A;
    410         0x004E; 0x1E48; 0x0052; 0x1E5E; 0x0054; 0x1E6E; 0x005A; 0x1E94;
    411         0x0062; 0x1E07; 0x0064; 0x1E0F; 0x0068; 0x1E96; 0x006B; 0x1E35;
    412         0x006C; 0x1E3B; 0x006E; 0x1E49; 0x0072; 0x1E5F; 0x0074; 0x1E6F;
    413         0x007A; 0x1E95; 0x003C; 0x226E; 0x003D; 0x2260; 0x003E; 0x226F;
    414         0x2190; 0x219A; 0x2192; 0x219B; 0x2194; 0x21AE; 0x21D0; 0x21CD;
    415         0x21D2; 0x21CF; 0x21D4; 0x21CE; 0x2203; 0x2204; 0x2208; 0x2209;
    416         0x220B; 0x220C; 0x2223; 0x2224; 0x2225; 0x2226; 0x223C; 0x2241;
    417         0x2243; 0x2244; 0x2245; 0x2247; 0x2248; 0x2249; 0x224D; 0x226D;
    418         0x2261; 0x2262; 0x2264; 0x2270; 0x2265; 0x2271; 0x2272; 0x2274;
    419         0x2273; 0x2275; 0x2276; 0x2278; 0x2277; 0x2279; 0x227A; 0x2280;
    420         0x227B; 0x2281; 0x227C; 0x22E0; 0x227D; 0x22E1; 0x2282; 0x2284;
    421         0x2283; 0x2285; 0x2286; 0x2288; 0x2287; 0x2289; 0x2291; 0x22E2;
    422         0x2292; 0x22E3; 0x22A2; 0x22AC; 0x22A8; 0x22AD; 0x22A9; 0x22AE;
    423         0x22AB; 0x22AF; 0x22B2; 0x22EA; 0x22B3; 0x22EB; 0x22B4; 0x22EC;
    424         0x22B5; 0x22ED; 0x00A8; 0x1FC1; 0x03B1; 0x1FB6; 0x03B7; 0x1FC6;
    425         0x03B9; 0x1FD6; 0x03C5; 0x1FE6; 0x03C9; 0x1FF6; 0x03CA; 0x1FD7;
    426         0x03CB; 0x1FE7; 0x1F00; 0x1F06; 0x1F01; 0x1F07; 0x1F08; 0x1F0E;
    427         0x1F09; 0x1F0F; 0x1F20; 0x1F26; 0x1F21; 0x1F27; 0x1F28; 0x1F2E;
    428         0x1F29; 0x1F2F; 0x1F30; 0x1F36; 0x1F31; 0x1F37; 0x1F38; 0x1F3E;
    429         0x1F39; 0x1F3F; 0x1F50; 0x1F56; 0x1F51; 0x1F57; 0x1F59; 0x1F5F;
    430         0x1F60; 0x1F66; 0x1F61; 0x1F67; 0x1F68; 0x1F6E; 0x1F69; 0x1F6F;
    431         0x1FBF; 0x1FCF; 0x1FFE; 0x1FDF; 0x0391; 0x1FBC; 0x0397; 0x1FCC;
    432         0x03A9; 0x1FFC; 0x03AC; 0x1FB4; 0x03AE; 0x1FC4; 0x03B1; 0x1FB3;
    433         0x03B7; 0x1FC3; 0x03C9; 0x1FF3; 0x03CE; 0x1FF4; 0x1F00; 0x1F80;
    434         0x1F01; 0x1F81; 0x1F02; 0x1F82; 0x1F03; 0x1F83; 0x1F04; 0x1F84;
    435         0x1F05; 0x1F85; 0x1F06; 0x1F86; 0x1F07; 0x1F87; 0x1F08; 0x1F88;
    436         0x1F09; 0x1F89; 0x1F0A; 0x1F8A; 0x1F0B; 0x1F8B; 0x1F0C; 0x1F8C;
    437         0x1F0D; 0x1F8D; 0x1F0E; 0x1F8E; 0x1F0F; 0x1F8F; 0x1F20; 0x1F90;
    438         0x1F21; 0x1F91; 0x1F22; 0x1F92; 0x1F23; 0x1F93; 0x1F24; 0x1F94;
    439         0x1F25; 0x1F95; 0x1F26; 0x1F96; 0x1F27; 0x1F97; 0x1F28; 0x1F98;
    440         0x1F29; 0x1F99; 0x1F2A; 0x1F9A; 0x1F2B; 0x1F9B; 0x1F2C; 0x1F9C;
    441         0x1F2D; 0x1F9D; 0x1F2E; 0x1F9E; 0x1F2F; 0x1F9F; 0x1F60; 0x1FA0;
    442         0x1F61; 0x1FA1; 0x1F62; 0x1FA2; 0x1F63; 0x1FA3; 0x1F64; 0x1FA4;
    443         0x1F65; 0x1FA5; 0x1F66; 0x1FA6; 0x1F67; 0x1FA7; 0x1F68; 0x1FA8;
    444         0x1F69; 0x1FA9; 0x1F6A; 0x1FAA; 0x1F6B; 0x1FAB; 0x1F6C; 0x1FAC;
    445         0x1F6D; 0x1FAD; 0x1F6E; 0x1FAE; 0x1F6F; 0x1FAF; 0x1F70; 0x1FB2;
    446         0x1F74; 0x1FC2; 0x1F7C; 0x1FF2; 0x1FB6; 0x1FB7; 0x1FC6; 0x1FC7;
    447         0x1FF6; 0x1FF7; 0x0627; 0x0622; 0x0627; 0x0623; 0x0648; 0x0624;
    448         0x064A; 0x0626; 0x06C1; 0x06C2; 0x06D2; 0x06D3; 0x06D5; 0x06C0;
    449         0x0627; 0x0625; 0x0928; 0x0929; 0x0930; 0x0931; 0x0933; 0x0934;
    450         0x09C7; 0x09CB; 0x09C7; 0x09CC; 0x0B47; 0x0B4B; 0x0B47; 0x0B48;
    451         0x0B47; 0x0B4C; 0x0BC6; 0x0BCA; 0x0BC7; 0x0BCB; 0x0B92; 0x0B94;
    452         0x0BC6; 0x0BCC; 0x0C46; 0x0C48; 0x0CC6; 0x0CCA; 0x0CBF; 0x0CC0;
    453         0x0CC6; 0x0CC7; 0x0CCA; 0x0CCB; 0x0CC6; 0x0CC8; 0x0D46; 0x0D4A;
    454         0x0D47; 0x0D4B; 0x0D46; 0x0D4C; 0x0DD9; 0x0DDA; 0x0DDC; 0x0DDD;
    455         0x0DD9; 0x0DDC; 0x0DD9; 0x0DDE; 0x1025; 0x1026; 0x3046; 0x3094;
    456         0x304B; 0x304C; 0x304D; 0x304E; 0x304F; 0x3050; 0x3051; 0x3052;
    457         0x3053; 0x3054; 0x3055; 0x3056; 0x3057; 0x3058; 0x3059; 0x305A;
    458         0x305B; 0x305C; 0x305D; 0x305E; 0x305F; 0x3060; 0x3061; 0x3062;
    459         0x3064; 0x3065; 0x3066; 0x3067; 0x3068; 0x3069; 0x306F; 0x3070;
    460         0x3072; 0x3073; 0x3075; 0x3076; 0x3078; 0x3079; 0x307B; 0x307C;
    461         0x309D; 0x309E; 0x30A6; 0x30F4; 0x30AB; 0x30AC; 0x30AD; 0x30AE;
    462         0x30AF; 0x30B0; 0x30B1; 0x30B2; 0x30B3; 0x30B4; 0x30B5; 0x30B6;
    463         0x30B7; 0x30B8; 0x30B9; 0x30BA; 0x30BB; 0x30BC; 0x30BD; 0x30BE;
    464         0x30BF; 0x30C0; 0x30C1; 0x30C2; 0x30C4; 0x30C5; 0x30C6; 0x30C7;
    465         0x30C8; 0x30C9; 0x30CF; 0x30D0; 0x30D2; 0x30D3; 0x30D5; 0x30D6;
    466         0x30D8; 0x30D9; 0x30DB; 0x30DC; 0x30EF; 0x30F7; 0x30F0; 0x30F8;
    467         0x30F1; 0x30F9; 0x30F2; 0x30FA; 0x30FD; 0x30FE; 0x306F; 0x3071;
    468         0x3072; 0x3074; 0x3075; 0x3077; 0x3078; 0x307A; 0x307B; 0x307D;
    469         0x30CF; 0x30D1; 0x30D2; 0x30D4; 0x30D5; 0x30D7; 0x30D8; 0x30DA;
    470         0x30DB; 0x30DD
    471 |]
    472 
    473 let uniCharCombiningBitmap = "\
    474 \x00\x00\x00\x01\x02\x03\x04\x05\
    475 \x00\x06\x07\x08\x09\x0A\x0B\x0C\
    476 \x0D\x14\x00\x00\x00\x00\x00\x0E\
    477 \x0F\x00\x00\x00\x00\x00\x00\x00\
    478 \x10\x00\x00\x00\x00\x00\x00\x00\
    479 \x00\x00\x00\x00\x00\x00\x00\x00\
    480 \x11\x00\x00\x00\x00\x00\x00\x00\
    481 \x00\x00\x00\x00\x00\x00\x00\x00\
    482 \x00\x00\x00\x00\x00\x00\x00\x00\
    483 \x00\x00\x00\x00\x00\x00\x00\x00\
    484 \x00\x00\x00\x00\x00\x00\x00\x00\
    485 \x00\x00\x00\x00\x00\x00\x00\x00\
    486 \x00\x00\x00\x00\x00\x00\x00\x00\
    487 \x00\x00\x00\x00\x00\x00\x00\x00\
    488 \x00\x00\x00\x00\x00\x00\x00\x00\
    489 \x00\x00\x00\x00\x00\x00\x00\x00\
    490 \x00\x00\x00\x00\x00\x00\x00\x00\
    491 \x00\x00\x00\x00\x00\x00\x00\x00\
    492 \x00\x00\x00\x00\x00\x00\x00\x00\
    493 \x00\x00\x00\x00\x00\x00\x00\x00\
    494 \x00\x00\x00\x00\x00\x00\x00\x00\
    495 \x00\x00\x00\x00\x00\x00\x00\x00\
    496 \x00\x00\x00\x00\x00\x00\x00\x00\
    497 \x00\x00\x00\x00\x00\x00\x00\x00\
    498 \x00\x00\x00\x00\x00\x00\x00\x00\
    499 \x00\x00\x00\x00\x00\x00\x00\x00\
    500 \x00\x00\x00\x00\x00\x00\x00\x00\
    501 \x00\x00\x00\x00\x00\x00\x00\x00\
    502 \x00\x00\x00\x00\x00\x00\x00\x00\
    503 \x00\x00\x00\x00\x00\x00\x00\x00\
    504 \x00\x00\x00\x00\x00\x00\x00\x00\
    505 \x00\x00\x00\x12\x00\x00\x13\x00\
    506 \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
    507 \xFF\xFF\x00\x00\xFF\xFF\x00\x00\
    508 \x00\x00\x00\x00\x00\x00\x00\x00\
    509 \x00\x00\x00\x00\x00\x00\x00\x00\
    510 \x00\x00\x00\x00\x00\x00\x00\x00\
    511 \x00\x00\x00\x00\x00\x00\x00\x00\
    512 \x78\x03\x00\x00\x00\x00\x00\x00\
    513 \x00\x00\x00\x00\x00\x00\x00\x00\
    514 \x00\x00\x00\x00\x00\x00\x00\x00\
    515 \x00\x00\x00\x00\x00\x00\x00\x00\
    516 \x00\x00\xFE\xFF\xFB\xFF\xFF\xBB\
    517 \x16\x00\x00\x00\x00\x00\x00\x00\
    518 \x00\x00\x00\x00\x00\x00\x00\x00\
    519 \x00\xF8\x3F\x00\x00\x00\x01\x00\
    520 \x00\x00\x00\x00\x00\x00\x00\x00\
    521 \x00\x00\xC0\xFF\x9F\x3D\x00\x00\
    522 \x00\x00\x02\x00\x00\x00\xFF\xFF\
    523 \xFF\x07\x00\x00\x00\x00\x00\x00\
    524 \x00\x00\x00\x00\xC0\xFF\x01\x00\
    525 \x00\x00\x00\x00\x00\x00\x00\x00\
    526 \x0E\x00\x00\x00\x00\x00\x00\xD0\
    527 \xFF\x3F\x1E\x00\x0C\x00\x00\x00\
    528 \x0E\x00\x00\x00\x00\x00\x00\xD0\
    529 \x9F\x39\x80\x00\x0C\x00\x00\x00\
    530 \x04\x00\x00\x00\x00\x00\x00\xD0\
    531 \x87\x39\x00\x00\x00\x00\x03\x00\
    532 \x0E\x00\x00\x00\x00\x00\x00\xD0\
    533 \xBF\x3B\x00\x00\x00\x00\x00\x00\
    534 \x0E\x00\x00\x00\x00\x00\x00\xD0\
    535 \x8F\x39\xC0\x00\x00\x00\x00\x00\
    536 \x04\x00\x00\x00\x00\x00\x00\xC0\
    537 \xC7\x3D\x80\x00\x00\x00\x00\x00\
    538 \x0E\x00\x00\x00\x00\x00\x00\xC0\
    539 \xDF\x3D\x60\x00\x00\x00\x00\x00\
    540 \x0C\x00\x00\x00\x00\x00\x00\xC0\
    541 \xDF\x3D\x60\x00\x00\x00\x00\x00\
    542 \x0C\x00\x00\x00\x00\x00\x00\xC0\
    543 \xCF\x3D\x80\x00\x00\x00\x00\x00\
    544 \x0C\x00\x00\x00\x00\x00\x00\x00\
    545 \x00\x84\x5F\xFF\x00\x00\x0C\x00\
    546 \x00\x00\x00\x00\x00\x00\xF2\x07\
    547 \x80\x7F\x00\x00\x00\x00\x00\x00\
    548 \x00\x00\x00\x00\x00\x00\xF2\x1B\
    549 \x00\x3F\x00\x00\x00\x00\x00\x00\
    550 \x00\x00\x00\x03\x00\x00\xA0\xC2\
    551 \x00\x00\x00\x00\x00\x00\xFE\xFF\
    552 \xDF\x00\xFF\xFE\xFF\xFF\xFF\x1F\
    553 \x40\x00\x00\x00\x00\x00\x00\x00\
    554 \x00\x00\x00\x00\x00\xF0\xC7\x03\
    555 \x00\x00\xC0\x03\x00\x00\x00\x00\
    556 \x00\x00\x00\x00\x00\x00\x00\x00\
    557 \x00\x00\x00\x00\x00\x00\x00\x00\
    558 \x00\x00\x1C\x00\x00\x00\x1C\x00\
    559 \x00\x00\x0C\x00\x00\x00\x0C\x00\
    560 \x00\x00\x00\x00\x00\x00\xF0\xFF\
    561 \xFF\xFF\x0F\x00\x00\x00\x00\x00\
    562 \x00\x38\x00\x00\x00\x00\x00\x00\
    563 \x00\x00\x00\x00\x00\x00\x00\x00\
    564 \x00\x00\x00\x00\x00\x02\x00\x00\
    565 \x00\x00\x00\x00\x00\x00\x00\x00\
    566 \x00\x00\x00\x00\x00\x00\x00\x00\
    567 \x00\x00\x00\x00\x00\x00\x00\x00\
    568 \x00\x00\x00\x00\x00\x00\x00\x00\
    569 \x00\x00\xFF\xFF\xFF\x07\x00\x00\
    570 \x00\x00\x00\x00\x00\xFC\x00\x00\
    571 \x00\x00\x00\x00\x00\x00\x00\x00\
    572 \x00\x00\x00\x06\x00\x00\x00\x00\
    573 \x00\x00\x00\x00\x00\x00\x00\x00\
    574 \x00\x00\x00\x40\x00\x00\x00\x00\
    575 \x00\x00\x00\x00\x00\x00\x00\x00\
    576 \x00\x00\x00\x00\x00\x00\x00\x00\
    577 \x00\x00\x00\x00\x00\x00\x00\x00\
    578 \xFF\xFF\x00\x00\x0F\x00\x00\x00\
    579 \x00\x00\x00\x00\x00\x00\x00\x00\
    580 \x00\x00\x00\x00\x00\x00\x00\x00\
    581 \x00\x00\x00\x00\x00\x00\x00\x00\
    582 \x00\x00\x00\x00\x00\x00\x00\x00\
    583 \x00\x00\x00\x00\xFE\xFF\x3F\x00\
    584 \x00\x00\x00\x00\x00\xFF\xFF\xFF\
    585 \x07\x00\x00\x00\x00\x00\x00\x00"
    586 
    587 (****)
    588 
    589 let bitmap_test base bitmap character =
    590   character >= base && character < 0x10000
    591     &&
    592   (let value = get bitmap ((character lsr 8) land 0xFF) in
    593    value = 0xFF
    594       ||
    595    (value <> 0
    596        &&
    597     get bitmap ((value - 1) * 32 + 256 + (character land 0xFF) / 8)
    598       land (1 lsl (character land 7)) <> 0))
    599 
    600 let unicode_combinable character =
    601   bitmap_test 0x0300 uniCharCombiningBitmap character
    602 
    603 let rec find_rec t i j v =
    604   if i + 1 = j then begin
    605     if t.(i * 2) = v then t.(i * 2 + 1) else 0
    606   end else begin
    607     let k = (i + j) / 2 in
    608     if v < t.(k * 2) then
    609       find_rec t i k v
    610     else
    611       find_rec t k j v
    612   end
    613 
    614 let find t i n v =
    615   let j = i + n in
    616   if v < t.(2 * i) || v > t.(2 * (j - 1)) then 0 else
    617   find_rec t i j v
    618 
    619 let uniCharPrecompSourceTableLen = Array.length uniCharPrecompSourceTable / 2
    620 
    621 let combine v v' =
    622   if v' >= hangul_vbase && v' < hangul_tbase + hangul_tcount then begin
    623     if
    624       v' < hangul_vbase + hangul_vcount &&
    625       v >= hangul_lbase && v < hangul_lbase + hangul_lcount
    626     then
    627       hangul_sbase + ((v - hangul_lbase) * (hangul_vcount * hangul_tcount)) +
    628                      ((v' - hangul_vbase) * hangul_tcount)
    629     else if
    630       v' > hangul_tbase &&
    631       v >= hangul_sbase && v < hangul_sbase + hangul_scount
    632     then
    633       if (v - hangul_sbase) mod hangul_tcount <> 0 then 0 else
    634       v + v' - hangul_tbase
    635     else
    636       0
    637   end else begin
    638     let k =
    639       find uniCharPrecompSourceTable 0
    640         uniCharPrecompSourceTableLen v'
    641     in
    642     if k = 0 then 0 else
    643     find uniCharBMPPrecompDestinationTable (k land 0xFFFF) (k lsr 16) v
    644   end
    645 
    646 (****)
    647 
    648 let rec scan d s i l =
    649   if i < l then begin
    650     let c = get s i in
    651     if c < 0x80 then
    652       cont d s i l (i + 1) c
    653     else if c < 0xE0 then begin
    654       (* 80 - 7FF *)
    655       if c < 0xc2 || i + 1 >= l then fail () else
    656       let c1 = get s (i + 1) in
    657       if c1 land 0xc0 <> 0x80 then fail () else
    658       let v = c lsl 6 + c1 - 0x3080 in
    659       cont d s i l (i + 2) v
    660     end else if c < 0xF0 then begin
    661       (* 800 - FFFF *)
    662       if i + 2 >= l then fail () else
    663       let c1 = get s (i + 1) in
    664       let c2 = get s (i + 2) in
    665       if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
    666       let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
    667       if v < 0x800 then fail () else
    668       cont d s i l (i + 3) v
    669     end else begin
    670       (* 10000 - 10FFFF *)
    671       if i + 3 >= l then fail () else
    672       let c1 = get s (i + 1) in
    673       let c2 = get s (i + 2) in
    674       let c3 = get s (i + 3) in
    675       if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
    676       let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
    677       if v < 0x10000 || v > 0x10ffff then fail () else
    678       cont d s i l (i + 4) v
    679     end
    680   end else begin
    681     let (i1, i2) = d in
    682     String.blit s i2 s i1 (l - i2);
    683     String.sub s 0 (i1 + l - i2)
    684   end
    685 
    686 and cont d s i l j v' =
    687   if unicode_combinable v' then begin
    688     let i = prev_char s i in
    689     let (v, _) = decode_char s i l in
    690     let v'' = combine v v' in
    691     if v'' = 0 then
    692       scan d s j l
    693     else begin
    694       let (i1, i2) = d in
    695       String.blit s i2 s i1 (i - i2);
    696       let i1 = i1 + i - i2 in
    697       let (v'', i) = compose_rec s j l v'' in
    698       let i1 = encode_char s i1 l v'' in
    699       scan (i1, i) s i l
    700     end
    701   end else
    702     scan d s j l
    703 
    704 and compose_rec s i l v =
    705   try
    706     let (v', j) = decode_char s i l in
    707     if unicode_combinable v' then begin
    708       let v'' = combine v v' in
    709       if v'' = 0 then
    710         (v, i)
    711       else
    712         compose_rec s j l v''
    713     end else
    714       (v, i)
    715   with Invalid ->
    716     (v, i)
    717 
    718 let compose s =
    719   try scan (0, 0) (String.copy s) 0 (String.length s) with Invalid -> s
    720 
    721 (***)
    722 
    723 let set_2 s i v =
    724   set s i (v land 0xff);
    725   set s (i + 1) (v lsr 8)
    726 
    727 let get_2 s i = (get s (i + 1)) lsl 8 + get s i
    728 
    729 let rec scan s' j s i l =
    730   if i < l then begin
    731     let c = get s i in
    732     if c < 0x80 then
    733       cont s' j s (i + 1) l c
    734     else if c < 0xE0 then begin
    735       (* 80 - 7FF *)
    736       if c < 0xc2 || i + 1 >= l then fail () else
    737       let c1 = get s (i + 1) in
    738       if c1 land 0xc0 <> 0x80 then fail () else
    739       let v = c lsl 6 + c1 - 0x3080 in
    740       cont s' j s (i + 2) l v
    741     end else if c < 0xF0 then begin
    742       (* 800 - FFFF *)
    743       if i + 2 >= l then fail () else
    744       let c1 = get s (i + 1) in
    745       let c2 = get s (i + 2) in
    746       if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
    747       let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
    748       if v < 0x800 then fail () else
    749       cont s' j s (i + 3) l v
    750     end else begin
    751       (* 10000 - 10FFFF *)
    752       if i + 3 >= l then fail () else
    753       let c1 = get s (i + 1) in
    754       let c2 = get s (i + 2) in
    755       let c3 = get s (i + 3) in
    756       if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
    757       let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
    758       if v < 0x10000 || v > 0x10ffff then fail () else
    759       cont s' j s (i + 4) l v
    760     end
    761   end else
    762     String.sub s' 0 j
    763 
    764 and cont s' j s i l v =
    765   if v < 0x10000 then begin
    766     set_2 s' j v;
    767     scan s' (j + 2) s i l
    768   end else begin
    769    let v = v - 0x10000 in
    770    set_2 s' j (v lsr 10 + 0xD800);
    771    set_2 s' (j + 2) (v land 0x3FF + 0xDC00);
    772    scan s' (j + 4) s i l
    773   end
    774 
    775 let to_utf_16 s =
    776   let l = String.length s in
    777   let s' = Bytes.create (2 * l) in
    778   scan s' 0 s 0 l
    779 
    780 (****)
    781 
    782 let rec scan s' i' l' s i l =
    783   if i + 2 <= l then begin
    784     let v = get_2 s i in
    785     if v < 0xD800 || v > 0xDFFF then
    786       let i' = encode_char s' i' l' v in
    787       scan s' i' l' s (i + 2) l
    788     else if v >= 0xdc00 || i + 4 > l then
    789       fail ()
    790     else begin
    791       let v' = get_2 s (i + 2) in
    792       if v' < 0xDC00 || v' > 0XDFFF then fail () else
    793       let i' =
    794         encode_char s' i' l' ((v - 0xD800) lsl 10 + (v' - 0xDC00) + 0x10000)
    795       in
    796       scan s' i' l' s (i + 4) l
    797     end
    798   end else if i < l then
    799     fail ()
    800   else
    801     String.sub s' 0 i'
    802 
    803 
    804 let from_utf_16 s =
    805   let l = String.length s in
    806   let l' = 3 * l / 2 in
    807   let s' = Bytes.create l' in
    808   scan s' 0 l' s 0 l
    809 
    810 (*
    811 from_utf16
    812 
    813 2 bytes -> 3 bytes
    814 4 bytes -> 4 bytes
    815 *)