unicode.ml (34037B)
1 2 open Unicode_tables 3 4 exception Invalid 5 6 let fail () = raise Invalid 7 8 let get s i = Char.code (String.unsafe_get s i) 9 let set s i v = String.unsafe_set s i (Char.unsafe_chr v) 10 11 (****) 12 13 let hangul_sbase = 0xAC00 14 let hangul_lbase = 0x1100 15 let hangul_vbase = 0x1161 16 let hangul_tbase = 0x11A7 17 18 let hangul_scount = 11172 19 let hangul_lcount = 19 20 let hangul_vcount = 21 21 let hangul_tcount = 28 22 let hangul_ncount = hangul_vcount * hangul_tcount 23 24 let set_char_3 s i c = 25 set s i (c lsr 12 + 0xE0); 26 set s (i + 1) ((c lsr 6) land 0x3f + 0x80); 27 set s (i + 2) (c land 0x3f + 0x80) 28 29 let rec norm s i l s' j = 30 if i < l then begin 31 let c = get s i in 32 if c < 0x80 then begin 33 set s' j (get ascii_lower c); 34 norm s (i + 1) l s' (j + 1) 35 end else if c < 0xE0 then begin 36 (* 80 - 7FF *) 37 if c < 0xc2 || i + 1 >= l then raise Invalid; 38 let c1 = get s (i + 1) in 39 if c1 land 0xc0 <> 0x80 then raise Invalid; 40 let idx = get norm_prim (c - 0xc0) in 41 let idx = idx lsl 6 + c1 - 0x80 in 42 let k = get norm_second_high idx in 43 if k = 0 then begin 44 set s' j c; 45 set s' (j + 1) c1; 46 norm s (i + 2) l s' (j + 2) 47 end else begin 48 let k = (k - 2) lsl 8 + get norm_second_low idx in 49 let n = get norm_repl k in 50 String.blit norm_repl (k + 1) s' j n; 51 norm s (i + 2) l s' (j + n) 52 end 53 end else if c < 0xF0 then begin 54 (* 800 - FFFF *) 55 if i + 2 >= l then raise Invalid; 56 let c1 = get s (i + 1) in 57 if c1 land 0xc0 <> 0x80 then raise Invalid; 58 let idx = c lsl 6 + c1 - 0x3880 in 59 if idx < 0x20 then raise Invalid; 60 let c2 = get s (i + 2) in 61 if c2 land 0xc0 <> 0x80 then raise Invalid; 62 let idx = get norm_prim idx in 63 let idx = idx lsl 6 + c2 - 0x80 in 64 let k = get norm_second_high idx in 65 if k = 0 then begin 66 set s' j c; 67 set s' (j + 1) c1; 68 set s' (j + 2) c2; 69 norm s (i + 3) l s' (j + 3) 70 end else if k = 1 then begin 71 let v = c lsl 12 + c1 lsl 6 + c2 - (0x000E2080 + hangul_sbase) in 72 if v >= hangul_scount then begin 73 set s' j c; 74 set s' (j + 1) c1; 75 set s' (j + 2) c2; 76 norm s (i + 3) l s' (j + 3) 77 end else begin 78 set_char_3 s' j (v / hangul_ncount + hangul_lbase); 79 set_char_3 s' (j + 3) 80 ((v mod hangul_ncount) / hangul_tcount + hangul_vbase); 81 if v mod hangul_tcount = 0 then 82 norm s (i + 3) l s' (j + 6) 83 else begin 84 set_char_3 s' (j + 6) ((v mod hangul_tcount) + hangul_tbase); 85 norm s (i + 3) l s' (j + 9) 86 end 87 end 88 end else begin 89 let k = (k - 2) lsl 8 + get norm_second_low idx in 90 let n = get norm_repl k in 91 String.blit norm_repl (k + 1) s' j n; 92 norm s (i + 3) l s' (j + n) 93 end 94 end else begin 95 (* 10000 - 10FFFF *) 96 if i + 3 >= l then raise Invalid; 97 let c1 = get s (i + 1) in 98 let c2 = get s (i + 2) in 99 let c3 = get s (i + 3) in 100 if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then raise Invalid; 101 let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in 102 if v < 0x10000 || v > 0x10ffff then raise Invalid; 103 set s' j c; 104 set s' (j + 1) c1; 105 set s' (j + 2) c2; 106 set s' (j + 3) c3; 107 norm s (i + 4) l s' (j + 4) 108 end 109 end else 110 String.sub s' 0 j 111 112 let normalize s = 113 let l = String.length s in 114 let s' = Bytes.create (3 * l) in 115 try norm s 0 l s' 0 with Invalid -> s 116 117 (****) 118 119 let rec compare_rec s s' i l = 120 if i = l then begin 121 if l < String.length s then 1 else 122 if l < String.length s' then -1 else 123 0 124 end else begin 125 let c = get s i in 126 let c' = get s' i in 127 if c < 0x80 && c' < 0x80 then begin 128 let v = compare (get ascii_lower c) (get ascii_lower c') in 129 if v <> 0 then v else compare_rec s s' (i + 1) l 130 end else 131 compare (normalize s) (normalize s') 132 end 133 134 let compare s s' = 135 compare_rec s s' 0 (min (String.length s) (String.length s')) 136 137 (****) 138 139 let rec decode_char s i l = 140 if i = l then fail () else 141 let c = get s i in 142 if c < 0x80 then 143 cont s (i + 1) l c 144 else if c < 0xE0 then begin 145 (* 80 - 7FF *) 146 if c < 0xc2 || i + 1 >= l then fail () else 147 let c1 = get s (i + 1) in 148 if c1 land 0xc0 <> 0x80 then fail () else 149 let v = c lsl 6 + c1 - 0x3080 in 150 cont s (i + 2) l v 151 end else if c < 0xF0 then begin 152 (* 800 - FFFF *) 153 if i + 2 >= l then fail () else 154 let c1 = get s (i + 1) in 155 let c2 = get s (i + 2) in 156 if (c1 lor c2) land 0xc0 <> 0x80 then fail () else 157 let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in 158 if v < 0x800 then fail () else 159 cont s (i + 3) l v 160 end else begin 161 (* 10000 - 10FFFF *) 162 if i + 3 >= l then fail () else 163 let c1 = get s (i + 1) in 164 let c2 = get s (i + 2) in 165 let c3 = get s (i + 3) in 166 if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else 167 let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in 168 if v < 0x10000 || v > 0x10ffff then fail () else 169 cont s (i + 4) l v 170 end 171 172 and cont s i l v = (v, i) 173 174 let encode_char s i l c = 175 if c < 0x80 then begin 176 if i >= l then fail () else begin 177 set s i c; 178 i + 1 179 end 180 end else if c < 0x800 then begin 181 if i + 1 >= l then fail () else begin 182 set s i (c lsr 6 + 0xC0); 183 set s (i + 1) (c land 0x3f + 0x80); 184 i + 2 185 end 186 end else if c < 0x10000 then begin 187 if i + 1 >= l then fail () else begin 188 set s i (c lsr 12 + 0xE0); 189 set s (i + 1) ((c lsr 6) land 0x3f + 0x80); 190 set s (i + 2) (c land 0x3f + 0x80); 191 i + 3 192 end 193 end else begin 194 if i + 1 >= l then fail () else begin 195 set s i (c lsr 18 + 0xF0); 196 set s (i + 1) ((c lsr 12) land 0x3f + 0x80); 197 set s (i + 2) ((c lsr 6) land 0x3f + 0x80); 198 set s (i + 3) (c land 0x3f + 0x80); 199 i + 4 200 end 201 end 202 203 let rec prev_char s i = 204 let i = i - 1 in 205 if i < 0 then fail () else 206 if (get s i) land 0xc0 <> 0x80 then i else prev_char s i 207 208 (****) 209 210 let uniCharPrecompSourceTable = [| 211 0x00000300; 0x00540000; 0x00000301; 0x00750054; 212 0x00000302; 0x002000C9; 0x00000303; 0x001C00E9; 213 0x00000304; 0x002C0105; 0x00000306; 0x00200131; 214 0x00000307; 0x002E0151; 0x00000308; 0x0036017F; 215 0x00000309; 0x001801B5; 0x0000030A; 0x000601CD; 216 0x0000030B; 0x000601D3; 0x0000030C; 0x002501D9; 217 0x0000030F; 0x000E01FE; 0x00000311; 0x000C020C; 218 0x00000313; 0x000E0218; 0x00000314; 0x00100226; 219 0x0000031B; 0x00040236; 0x00000323; 0x002A023A; 220 0x00000324; 0x00020264; 0x00000325; 0x00020266; 221 0x00000326; 0x00040268; 0x00000327; 0x0016026C; 222 0x00000328; 0x000A0282; 0x0000032D; 0x000C028C; 223 0x0000032E; 0x00020298; 0x00000330; 0x0006029A; 224 0x00000331; 0x001102A0; 0x00000338; 0x002C02B1; 225 0x00000342; 0x001D02DD; 0x00000345; 0x003F02FA; 226 0x00000653; 0x00010339; 0x00000654; 0x0006033A; 227 0x00000655; 0x00010340; 0x0000093C; 0x00030341; 228 0x000009BE; 0x00010344; 0x000009D7; 0x00010345; 229 0x00000B3E; 0x00010346; 0x00000B56; 0x00010347; 230 0x00000B57; 0x00010348; 0x00000BBE; 0x00020349; 231 0x00000BD7; 0x0002034B; 0x00000C56; 0x0001034D; 232 0x00000CC2; 0x0001034E; 0x00000CD5; 0x0003034F; 233 0x00000CD6; 0x00010352; 0x00000D3E; 0x00020353; 234 0x00000D57; 0x00010355; 0x00000DCA; 0x00020356; 235 0x00000DCF; 0x00010358; 0x00000DDF; 0x00010359; 236 0x0000102E; 0x0001035A; 0x00003099; 0x0030035B; 237 0x0000309A; 0x000A038B 238 |] 239 240 let uniCharBMPPrecompDestinationTable = [| 241 0x0041; 0x00C0; 0x0045; 0x00C8; 0x0049; 0x00CC; 0x004E; 0x01F8; 242 0x004F; 0x00D2; 0x0055; 0x00D9; 0x0057; 0x1E80; 0x0059; 0x1EF2; 243 0x0061; 0x00E0; 0x0065; 0x00E8; 0x0069; 0x00EC; 0x006E; 0x01F9; 244 0x006F; 0x00F2; 0x0075; 0x00F9; 0x0077; 0x1E81; 0x0079; 0x1EF3; 245 0x00A8; 0x1FED; 0x00C2; 0x1EA6; 0x00CA; 0x1EC0; 0x00D4; 0x1ED2; 246 0x00DC; 0x01DB; 0x00E2; 0x1EA7; 0x00EA; 0x1EC1; 0x00F4; 0x1ED3; 247 0x00FC; 0x01DC; 0x0102; 0x1EB0; 0x0103; 0x1EB1; 0x0112; 0x1E14; 248 0x0113; 0x1E15; 0x014C; 0x1E50; 0x014D; 0x1E51; 0x01A0; 0x1EDC; 249 0x01A1; 0x1EDD; 0x01AF; 0x1EEA; 0x01B0; 0x1EEB; 0x0391; 0x1FBA; 250 0x0395; 0x1FC8; 0x0397; 0x1FCA; 0x0399; 0x1FDA; 0x039F; 0x1FF8; 251 0x03A5; 0x1FEA; 0x03A9; 0x1FFA; 0x03B1; 0x1F70; 0x03B5; 0x1F72; 252 0x03B7; 0x1F74; 0x03B9; 0x1F76; 0x03BF; 0x1F78; 0x03C5; 0x1F7A; 253 0x03C9; 0x1F7C; 0x03CA; 0x1FD2; 0x03CB; 0x1FE2; 0x0415; 0x0400; 254 0x0418; 0x040D; 0x0435; 0x0450; 0x0438; 0x045D; 0x1F00; 0x1F02; 255 0x1F01; 0x1F03; 0x1F08; 0x1F0A; 0x1F09; 0x1F0B; 0x1F10; 0x1F12; 256 0x1F11; 0x1F13; 0x1F18; 0x1F1A; 0x1F19; 0x1F1B; 0x1F20; 0x1F22; 257 0x1F21; 0x1F23; 0x1F28; 0x1F2A; 0x1F29; 0x1F2B; 0x1F30; 0x1F32; 258 0x1F31; 0x1F33; 0x1F38; 0x1F3A; 0x1F39; 0x1F3B; 0x1F40; 0x1F42; 259 0x1F41; 0x1F43; 0x1F48; 0x1F4A; 0x1F49; 0x1F4B; 0x1F50; 0x1F52; 260 0x1F51; 0x1F53; 0x1F59; 0x1F5B; 0x1F60; 0x1F62; 0x1F61; 0x1F63; 261 0x1F68; 0x1F6A; 0x1F69; 0x1F6B; 0x1FBF; 0x1FCD; 0x1FFE; 0x1FDD; 262 0x0041; 0x00C1; 0x0043; 0x0106; 0x0045; 0x00C9; 0x0047; 0x01F4; 263 0x0049; 0x00CD; 0x004B; 0x1E30; 0x004C; 0x0139; 0x004D; 0x1E3E; 264 0x004E; 0x0143; 0x004F; 0x00D3; 0x0050; 0x1E54; 0x0052; 0x0154; 265 0x0053; 0x015A; 0x0055; 0x00DA; 0x0057; 0x1E82; 0x0059; 0x00DD; 266 0x005A; 0x0179; 0x0061; 0x00E1; 0x0063; 0x0107; 0x0065; 0x00E9; 267 0x0067; 0x01F5; 0x0069; 0x00ED; 0x006B; 0x1E31; 0x006C; 0x013A; 268 0x006D; 0x1E3F; 0x006E; 0x0144; 0x006F; 0x00F3; 0x0070; 0x1E55; 269 0x0072; 0x0155; 0x0073; 0x015B; 0x0075; 0x00FA; 0x0077; 0x1E83; 270 0x0079; 0x00FD; 0x007A; 0x017A; 0x00A8; 0x0385; 0x00C2; 0x1EA4; 271 0x00C5; 0x01FA; 0x00C6; 0x01FC; 0x00C7; 0x1E08; 0x00CA; 0x1EBE; 272 0x00CF; 0x1E2E; 0x00D4; 0x1ED0; 0x00D5; 0x1E4C; 0x00D8; 0x01FE; 273 0x00DC; 0x01D7; 0x00E2; 0x1EA5; 0x00E5; 0x01FB; 0x00E6; 0x01FD; 274 0x00E7; 0x1E09; 0x00EA; 0x1EBF; 0x00EF; 0x1E2F; 0x00F4; 0x1ED1; 275 0x00F5; 0x1E4D; 0x00F8; 0x01FF; 0x00FC; 0x01D8; 0x0102; 0x1EAE; 276 0x0103; 0x1EAF; 0x0112; 0x1E16; 0x0113; 0x1E17; 0x014C; 0x1E52; 277 0x014D; 0x1E53; 0x0168; 0x1E78; 0x0169; 0x1E79; 0x01A0; 0x1EDA; 278 0x01A1; 0x1EDB; 0x01AF; 0x1EE8; 0x01B0; 0x1EE9; 0x0391; 0x0386; 279 0x0395; 0x0388; 0x0397; 0x0389; 0x0399; 0x038A; 0x039F; 0x038C; 280 0x03A5; 0x038E; 0x03A9; 0x038F; 0x03B1; 0x03AC; 0x03B5; 0x03AD; 281 0x03B7; 0x03AE; 0x03B9; 0x03AF; 0x03BF; 0x03CC; 0x03C5; 0x03CD; 282 0x03C9; 0x03CE; 0x03CA; 0x0390; 0x03CB; 0x03B0; 0x03D2; 0x03D3; 283 0x0413; 0x0403; 0x041A; 0x040C; 0x0433; 0x0453; 0x043A; 0x045C; 284 0x1F00; 0x1F04; 0x1F01; 0x1F05; 0x1F08; 0x1F0C; 0x1F09; 0x1F0D; 285 0x1F10; 0x1F14; 0x1F11; 0x1F15; 0x1F18; 0x1F1C; 0x1F19; 0x1F1D; 286 0x1F20; 0x1F24; 0x1F21; 0x1F25; 0x1F28; 0x1F2C; 0x1F29; 0x1F2D; 287 0x1F30; 0x1F34; 0x1F31; 0x1F35; 0x1F38; 0x1F3C; 0x1F39; 0x1F3D; 288 0x1F40; 0x1F44; 0x1F41; 0x1F45; 0x1F48; 0x1F4C; 0x1F49; 0x1F4D; 289 0x1F50; 0x1F54; 0x1F51; 0x1F55; 0x1F59; 0x1F5D; 0x1F60; 0x1F64; 290 0x1F61; 0x1F65; 0x1F68; 0x1F6C; 0x1F69; 0x1F6D; 0x1FBF; 0x1FCE; 291 0x1FFE; 0x1FDE; 0x0041; 0x00C2; 0x0043; 0x0108; 0x0045; 0x00CA; 292 0x0047; 0x011C; 0x0048; 0x0124; 0x0049; 0x00CE; 0x004A; 0x0134; 293 0x004F; 0x00D4; 0x0053; 0x015C; 0x0055; 0x00DB; 0x0057; 0x0174; 294 0x0059; 0x0176; 0x005A; 0x1E90; 0x0061; 0x00E2; 0x0063; 0x0109; 295 0x0065; 0x00EA; 0x0067; 0x011D; 0x0068; 0x0125; 0x0069; 0x00EE; 296 0x006A; 0x0135; 0x006F; 0x00F4; 0x0073; 0x015D; 0x0075; 0x00FB; 297 0x0077; 0x0175; 0x0079; 0x0177; 0x007A; 0x1E91; 0x1EA0; 0x1EAC; 298 0x1EA1; 0x1EAD; 0x1EB8; 0x1EC6; 0x1EB9; 0x1EC7; 0x1ECC; 0x1ED8; 299 0x1ECD; 0x1ED9; 0x0041; 0x00C3; 0x0045; 0x1EBC; 0x0049; 0x0128; 300 0x004E; 0x00D1; 0x004F; 0x00D5; 0x0055; 0x0168; 0x0056; 0x1E7C; 301 0x0059; 0x1EF8; 0x0061; 0x00E3; 0x0065; 0x1EBD; 0x0069; 0x0129; 302 0x006E; 0x00F1; 0x006F; 0x00F5; 0x0075; 0x0169; 0x0076; 0x1E7D; 303 0x0079; 0x1EF9; 0x00C2; 0x1EAA; 0x00CA; 0x1EC4; 0x00D4; 0x1ED6; 304 0x00E2; 0x1EAB; 0x00EA; 0x1EC5; 0x00F4; 0x1ED7; 0x0102; 0x1EB4; 305 0x0103; 0x1EB5; 0x01A0; 0x1EE0; 0x01A1; 0x1EE1; 0x01AF; 0x1EEE; 306 0x01B0; 0x1EEF; 0x0041; 0x0100; 0x0045; 0x0112; 0x0047; 0x1E20; 307 0x0049; 0x012A; 0x004F; 0x014C; 0x0055; 0x016A; 0x0059; 0x0232; 308 0x0061; 0x0101; 0x0065; 0x0113; 0x0067; 0x1E21; 0x0069; 0x012B; 309 0x006F; 0x014D; 0x0075; 0x016B; 0x0079; 0x0233; 0x00C4; 0x01DE; 310 0x00C6; 0x01E2; 0x00D5; 0x022C; 0x00D6; 0x022A; 0x00DC; 0x01D5; 311 0x00E4; 0x01DF; 0x00E6; 0x01E3; 0x00F5; 0x022D; 0x00F6; 0x022B; 312 0x00FC; 0x01D6; 0x01EA; 0x01EC; 0x01EB; 0x01ED; 0x0226; 0x01E0; 313 0x0227; 0x01E1; 0x022E; 0x0230; 0x022F; 0x0231; 0x0391; 0x1FB9; 314 0x0399; 0x1FD9; 0x03A5; 0x1FE9; 0x03B1; 0x1FB1; 0x03B9; 0x1FD1; 315 0x03C5; 0x1FE1; 0x0418; 0x04E2; 0x0423; 0x04EE; 0x0438; 0x04E3; 316 0x0443; 0x04EF; 0x1E36; 0x1E38; 0x1E37; 0x1E39; 0x1E5A; 0x1E5C; 317 0x1E5B; 0x1E5D; 0x0041; 0x0102; 0x0045; 0x0114; 0x0047; 0x011E; 318 0x0049; 0x012C; 0x004F; 0x014E; 0x0055; 0x016C; 0x0061; 0x0103; 319 0x0065; 0x0115; 0x0067; 0x011F; 0x0069; 0x012D; 0x006F; 0x014F; 320 0x0075; 0x016D; 0x0228; 0x1E1C; 0x0229; 0x1E1D; 0x0391; 0x1FB8; 321 0x0399; 0x1FD8; 0x03A5; 0x1FE8; 0x03B1; 0x1FB0; 0x03B9; 0x1FD0; 322 0x03C5; 0x1FE0; 0x0410; 0x04D0; 0x0415; 0x04D6; 0x0416; 0x04C1; 323 0x0418; 0x0419; 0x0423; 0x040E; 0x0430; 0x04D1; 0x0435; 0x04D7; 324 0x0436; 0x04C2; 0x0438; 0x0439; 0x0443; 0x045E; 0x1EA0; 0x1EB6; 325 0x1EA1; 0x1EB7; 0x0041; 0x0226; 0x0042; 0x1E02; 0x0043; 0x010A; 326 0x0044; 0x1E0A; 0x0045; 0x0116; 0x0046; 0x1E1E; 0x0047; 0x0120; 327 0x0048; 0x1E22; 0x0049; 0x0130; 0x004D; 0x1E40; 0x004E; 0x1E44; 328 0x004F; 0x022E; 0x0050; 0x1E56; 0x0052; 0x1E58; 0x0053; 0x1E60; 329 0x0054; 0x1E6A; 0x0057; 0x1E86; 0x0058; 0x1E8A; 0x0059; 0x1E8E; 330 0x005A; 0x017B; 0x0061; 0x0227; 0x0062; 0x1E03; 0x0063; 0x010B; 331 0x0064; 0x1E0B; 0x0065; 0x0117; 0x0066; 0x1E1F; 0x0067; 0x0121; 332 0x0068; 0x1E23; 0x006D; 0x1E41; 0x006E; 0x1E45; 0x006F; 0x022F; 333 0x0070; 0x1E57; 0x0072; 0x1E59; 0x0073; 0x1E61; 0x0074; 0x1E6B; 334 0x0077; 0x1E87; 0x0078; 0x1E8B; 0x0079; 0x1E8F; 0x007A; 0x017C; 335 0x015A; 0x1E64; 0x015B; 0x1E65; 0x0160; 0x1E66; 0x0161; 0x1E67; 336 0x017F; 0x1E9B; 0x1E62; 0x1E68; 0x1E63; 0x1E69; 0x0041; 0x00C4; 337 0x0045; 0x00CB; 0x0048; 0x1E26; 0x0049; 0x00CF; 0x004F; 0x00D6; 338 0x0055; 0x00DC; 0x0057; 0x1E84; 0x0058; 0x1E8C; 0x0059; 0x0178; 339 0x0061; 0x00E4; 0x0065; 0x00EB; 0x0068; 0x1E27; 0x0069; 0x00EF; 340 0x006F; 0x00F6; 0x0074; 0x1E97; 0x0075; 0x00FC; 0x0077; 0x1E85; 341 0x0078; 0x1E8D; 0x0079; 0x00FF; 0x00D5; 0x1E4E; 0x00F5; 0x1E4F; 342 0x016A; 0x1E7A; 0x016B; 0x1E7B; 0x0399; 0x03AA; 0x03A5; 0x03AB; 343 0x03B9; 0x03CA; 0x03C5; 0x03CB; 0x03D2; 0x03D4; 0x0406; 0x0407; 344 0x0410; 0x04D2; 0x0415; 0x0401; 0x0416; 0x04DC; 0x0417; 0x04DE; 345 0x0418; 0x04E4; 0x041E; 0x04E6; 0x0423; 0x04F0; 0x0427; 0x04F4; 346 0x042B; 0x04F8; 0x042D; 0x04EC; 0x0430; 0x04D3; 0x0435; 0x0451; 347 0x0436; 0x04DD; 0x0437; 0x04DF; 0x0438; 0x04E5; 0x043E; 0x04E7; 348 0x0443; 0x04F1; 0x0447; 0x04F5; 0x044B; 0x04F9; 0x044D; 0x04ED; 349 0x0456; 0x0457; 0x04D8; 0x04DA; 0x04D9; 0x04DB; 0x04E8; 0x04EA; 350 0x04E9; 0x04EB; 0x0041; 0x1EA2; 0x0045; 0x1EBA; 0x0049; 0x1EC8; 351 0x004F; 0x1ECE; 0x0055; 0x1EE6; 0x0059; 0x1EF6; 0x0061; 0x1EA3; 352 0x0065; 0x1EBB; 0x0069; 0x1EC9; 0x006F; 0x1ECF; 0x0075; 0x1EE7; 353 0x0079; 0x1EF7; 0x00C2; 0x1EA8; 0x00CA; 0x1EC2; 0x00D4; 0x1ED4; 354 0x00E2; 0x1EA9; 0x00EA; 0x1EC3; 0x00F4; 0x1ED5; 0x0102; 0x1EB2; 355 0x0103; 0x1EB3; 0x01A0; 0x1EDE; 0x01A1; 0x1EDF; 0x01AF; 0x1EEC; 356 0x01B0; 0x1EED; 0x0041; 0x00C5; 0x0055; 0x016E; 0x0061; 0x00E5; 357 0x0075; 0x016F; 0x0077; 0x1E98; 0x0079; 0x1E99; 0x004F; 0x0150; 358 0x0055; 0x0170; 0x006F; 0x0151; 0x0075; 0x0171; 0x0423; 0x04F2; 359 0x0443; 0x04F3; 0x0041; 0x01CD; 0x0043; 0x010C; 0x0044; 0x010E; 360 0x0045; 0x011A; 0x0047; 0x01E6; 0x0048; 0x021E; 0x0049; 0x01CF; 361 0x004B; 0x01E8; 0x004C; 0x013D; 0x004E; 0x0147; 0x004F; 0x01D1; 362 0x0052; 0x0158; 0x0053; 0x0160; 0x0054; 0x0164; 0x0055; 0x01D3; 363 0x005A; 0x017D; 0x0061; 0x01CE; 0x0063; 0x010D; 0x0064; 0x010F; 364 0x0065; 0x011B; 0x0067; 0x01E7; 0x0068; 0x021F; 0x0069; 0x01D0; 365 0x006A; 0x01F0; 0x006B; 0x01E9; 0x006C; 0x013E; 0x006E; 0x0148; 366 0x006F; 0x01D2; 0x0072; 0x0159; 0x0073; 0x0161; 0x0074; 0x0165; 367 0x0075; 0x01D4; 0x007A; 0x017E; 0x00DC; 0x01D9; 0x00FC; 0x01DA; 368 0x01B7; 0x01EE; 0x0292; 0x01EF; 0x0041; 0x0200; 0x0045; 0x0204; 369 0x0049; 0x0208; 0x004F; 0x020C; 0x0052; 0x0210; 0x0055; 0x0214; 370 0x0061; 0x0201; 0x0065; 0x0205; 0x0069; 0x0209; 0x006F; 0x020D; 371 0x0072; 0x0211; 0x0075; 0x0215; 0x0474; 0x0476; 0x0475; 0x0477; 372 0x0041; 0x0202; 0x0045; 0x0206; 0x0049; 0x020A; 0x004F; 0x020E; 373 0x0052; 0x0212; 0x0055; 0x0216; 0x0061; 0x0203; 0x0065; 0x0207; 374 0x0069; 0x020B; 0x006F; 0x020F; 0x0072; 0x0213; 0x0075; 0x0217; 375 0x0391; 0x1F08; 0x0395; 0x1F18; 0x0397; 0x1F28; 0x0399; 0x1F38; 376 0x039F; 0x1F48; 0x03A9; 0x1F68; 0x03B1; 0x1F00; 0x03B5; 0x1F10; 377 0x03B7; 0x1F20; 0x03B9; 0x1F30; 0x03BF; 0x1F40; 0x03C1; 0x1FE4; 378 0x03C5; 0x1F50; 0x03C9; 0x1F60; 0x0391; 0x1F09; 0x0395; 0x1F19; 379 0x0397; 0x1F29; 0x0399; 0x1F39; 0x039F; 0x1F49; 0x03A1; 0x1FEC; 380 0x03A5; 0x1F59; 0x03A9; 0x1F69; 0x03B1; 0x1F01; 0x03B5; 0x1F11; 381 0x03B7; 0x1F21; 0x03B9; 0x1F31; 0x03BF; 0x1F41; 0x03C1; 0x1FE5; 382 0x03C5; 0x1F51; 0x03C9; 0x1F61; 0x004F; 0x01A0; 0x0055; 0x01AF; 383 0x006F; 0x01A1; 0x0075; 0x01B0; 0x0041; 0x1EA0; 0x0042; 0x1E04; 384 0x0044; 0x1E0C; 0x0045; 0x1EB8; 0x0048; 0x1E24; 0x0049; 0x1ECA; 385 0x004B; 0x1E32; 0x004C; 0x1E36; 0x004D; 0x1E42; 0x004E; 0x1E46; 386 0x004F; 0x1ECC; 0x0052; 0x1E5A; 0x0053; 0x1E62; 0x0054; 0x1E6C; 387 0x0055; 0x1EE4; 0x0056; 0x1E7E; 0x0057; 0x1E88; 0x0059; 0x1EF4; 388 0x005A; 0x1E92; 0x0061; 0x1EA1; 0x0062; 0x1E05; 0x0064; 0x1E0D; 389 0x0065; 0x1EB9; 0x0068; 0x1E25; 0x0069; 0x1ECB; 0x006B; 0x1E33; 390 0x006C; 0x1E37; 0x006D; 0x1E43; 0x006E; 0x1E47; 0x006F; 0x1ECD; 391 0x0072; 0x1E5B; 0x0073; 0x1E63; 0x0074; 0x1E6D; 0x0075; 0x1EE5; 392 0x0076; 0x1E7F; 0x0077; 0x1E89; 0x0079; 0x1EF5; 0x007A; 0x1E93; 393 0x01A0; 0x1EE2; 0x01A1; 0x1EE3; 0x01AF; 0x1EF0; 0x01B0; 0x1EF1; 394 0x0055; 0x1E72; 0x0075; 0x1E73; 0x0041; 0x1E00; 0x0061; 0x1E01; 395 0x0053; 0x0218; 0x0054; 0x021A; 0x0073; 0x0219; 0x0074; 0x021B; 396 0x0043; 0x00C7; 0x0044; 0x1E10; 0x0045; 0x0228; 0x0047; 0x0122; 397 0x0048; 0x1E28; 0x004B; 0x0136; 0x004C; 0x013B; 0x004E; 0x0145; 398 0x0052; 0x0156; 0x0053; 0x015E; 0x0054; 0x0162; 0x0063; 0x00E7; 399 0x0064; 0x1E11; 0x0065; 0x0229; 0x0067; 0x0123; 0x0068; 0x1E29; 400 0x006B; 0x0137; 0x006C; 0x013C; 0x006E; 0x0146; 0x0072; 0x0157; 401 0x0073; 0x015F; 0x0074; 0x0163; 0x0041; 0x0104; 0x0045; 0x0118; 402 0x0049; 0x012E; 0x004F; 0x01EA; 0x0055; 0x0172; 0x0061; 0x0105; 403 0x0065; 0x0119; 0x0069; 0x012F; 0x006F; 0x01EB; 0x0075; 0x0173; 404 0x0044; 0x1E12; 0x0045; 0x1E18; 0x004C; 0x1E3C; 0x004E; 0x1E4A; 405 0x0054; 0x1E70; 0x0055; 0x1E76; 0x0064; 0x1E13; 0x0065; 0x1E19; 406 0x006C; 0x1E3D; 0x006E; 0x1E4B; 0x0074; 0x1E71; 0x0075; 0x1E77; 407 0x0048; 0x1E2A; 0x0068; 0x1E2B; 0x0045; 0x1E1A; 0x0049; 0x1E2C; 408 0x0055; 0x1E74; 0x0065; 0x1E1B; 0x0069; 0x1E2D; 0x0075; 0x1E75; 409 0x0042; 0x1E06; 0x0044; 0x1E0E; 0x004B; 0x1E34; 0x004C; 0x1E3A; 410 0x004E; 0x1E48; 0x0052; 0x1E5E; 0x0054; 0x1E6E; 0x005A; 0x1E94; 411 0x0062; 0x1E07; 0x0064; 0x1E0F; 0x0068; 0x1E96; 0x006B; 0x1E35; 412 0x006C; 0x1E3B; 0x006E; 0x1E49; 0x0072; 0x1E5F; 0x0074; 0x1E6F; 413 0x007A; 0x1E95; 0x003C; 0x226E; 0x003D; 0x2260; 0x003E; 0x226F; 414 0x2190; 0x219A; 0x2192; 0x219B; 0x2194; 0x21AE; 0x21D0; 0x21CD; 415 0x21D2; 0x21CF; 0x21D4; 0x21CE; 0x2203; 0x2204; 0x2208; 0x2209; 416 0x220B; 0x220C; 0x2223; 0x2224; 0x2225; 0x2226; 0x223C; 0x2241; 417 0x2243; 0x2244; 0x2245; 0x2247; 0x2248; 0x2249; 0x224D; 0x226D; 418 0x2261; 0x2262; 0x2264; 0x2270; 0x2265; 0x2271; 0x2272; 0x2274; 419 0x2273; 0x2275; 0x2276; 0x2278; 0x2277; 0x2279; 0x227A; 0x2280; 420 0x227B; 0x2281; 0x227C; 0x22E0; 0x227D; 0x22E1; 0x2282; 0x2284; 421 0x2283; 0x2285; 0x2286; 0x2288; 0x2287; 0x2289; 0x2291; 0x22E2; 422 0x2292; 0x22E3; 0x22A2; 0x22AC; 0x22A8; 0x22AD; 0x22A9; 0x22AE; 423 0x22AB; 0x22AF; 0x22B2; 0x22EA; 0x22B3; 0x22EB; 0x22B4; 0x22EC; 424 0x22B5; 0x22ED; 0x00A8; 0x1FC1; 0x03B1; 0x1FB6; 0x03B7; 0x1FC6; 425 0x03B9; 0x1FD6; 0x03C5; 0x1FE6; 0x03C9; 0x1FF6; 0x03CA; 0x1FD7; 426 0x03CB; 0x1FE7; 0x1F00; 0x1F06; 0x1F01; 0x1F07; 0x1F08; 0x1F0E; 427 0x1F09; 0x1F0F; 0x1F20; 0x1F26; 0x1F21; 0x1F27; 0x1F28; 0x1F2E; 428 0x1F29; 0x1F2F; 0x1F30; 0x1F36; 0x1F31; 0x1F37; 0x1F38; 0x1F3E; 429 0x1F39; 0x1F3F; 0x1F50; 0x1F56; 0x1F51; 0x1F57; 0x1F59; 0x1F5F; 430 0x1F60; 0x1F66; 0x1F61; 0x1F67; 0x1F68; 0x1F6E; 0x1F69; 0x1F6F; 431 0x1FBF; 0x1FCF; 0x1FFE; 0x1FDF; 0x0391; 0x1FBC; 0x0397; 0x1FCC; 432 0x03A9; 0x1FFC; 0x03AC; 0x1FB4; 0x03AE; 0x1FC4; 0x03B1; 0x1FB3; 433 0x03B7; 0x1FC3; 0x03C9; 0x1FF3; 0x03CE; 0x1FF4; 0x1F00; 0x1F80; 434 0x1F01; 0x1F81; 0x1F02; 0x1F82; 0x1F03; 0x1F83; 0x1F04; 0x1F84; 435 0x1F05; 0x1F85; 0x1F06; 0x1F86; 0x1F07; 0x1F87; 0x1F08; 0x1F88; 436 0x1F09; 0x1F89; 0x1F0A; 0x1F8A; 0x1F0B; 0x1F8B; 0x1F0C; 0x1F8C; 437 0x1F0D; 0x1F8D; 0x1F0E; 0x1F8E; 0x1F0F; 0x1F8F; 0x1F20; 0x1F90; 438 0x1F21; 0x1F91; 0x1F22; 0x1F92; 0x1F23; 0x1F93; 0x1F24; 0x1F94; 439 0x1F25; 0x1F95; 0x1F26; 0x1F96; 0x1F27; 0x1F97; 0x1F28; 0x1F98; 440 0x1F29; 0x1F99; 0x1F2A; 0x1F9A; 0x1F2B; 0x1F9B; 0x1F2C; 0x1F9C; 441 0x1F2D; 0x1F9D; 0x1F2E; 0x1F9E; 0x1F2F; 0x1F9F; 0x1F60; 0x1FA0; 442 0x1F61; 0x1FA1; 0x1F62; 0x1FA2; 0x1F63; 0x1FA3; 0x1F64; 0x1FA4; 443 0x1F65; 0x1FA5; 0x1F66; 0x1FA6; 0x1F67; 0x1FA7; 0x1F68; 0x1FA8; 444 0x1F69; 0x1FA9; 0x1F6A; 0x1FAA; 0x1F6B; 0x1FAB; 0x1F6C; 0x1FAC; 445 0x1F6D; 0x1FAD; 0x1F6E; 0x1FAE; 0x1F6F; 0x1FAF; 0x1F70; 0x1FB2; 446 0x1F74; 0x1FC2; 0x1F7C; 0x1FF2; 0x1FB6; 0x1FB7; 0x1FC6; 0x1FC7; 447 0x1FF6; 0x1FF7; 0x0627; 0x0622; 0x0627; 0x0623; 0x0648; 0x0624; 448 0x064A; 0x0626; 0x06C1; 0x06C2; 0x06D2; 0x06D3; 0x06D5; 0x06C0; 449 0x0627; 0x0625; 0x0928; 0x0929; 0x0930; 0x0931; 0x0933; 0x0934; 450 0x09C7; 0x09CB; 0x09C7; 0x09CC; 0x0B47; 0x0B4B; 0x0B47; 0x0B48; 451 0x0B47; 0x0B4C; 0x0BC6; 0x0BCA; 0x0BC7; 0x0BCB; 0x0B92; 0x0B94; 452 0x0BC6; 0x0BCC; 0x0C46; 0x0C48; 0x0CC6; 0x0CCA; 0x0CBF; 0x0CC0; 453 0x0CC6; 0x0CC7; 0x0CCA; 0x0CCB; 0x0CC6; 0x0CC8; 0x0D46; 0x0D4A; 454 0x0D47; 0x0D4B; 0x0D46; 0x0D4C; 0x0DD9; 0x0DDA; 0x0DDC; 0x0DDD; 455 0x0DD9; 0x0DDC; 0x0DD9; 0x0DDE; 0x1025; 0x1026; 0x3046; 0x3094; 456 0x304B; 0x304C; 0x304D; 0x304E; 0x304F; 0x3050; 0x3051; 0x3052; 457 0x3053; 0x3054; 0x3055; 0x3056; 0x3057; 0x3058; 0x3059; 0x305A; 458 0x305B; 0x305C; 0x305D; 0x305E; 0x305F; 0x3060; 0x3061; 0x3062; 459 0x3064; 0x3065; 0x3066; 0x3067; 0x3068; 0x3069; 0x306F; 0x3070; 460 0x3072; 0x3073; 0x3075; 0x3076; 0x3078; 0x3079; 0x307B; 0x307C; 461 0x309D; 0x309E; 0x30A6; 0x30F4; 0x30AB; 0x30AC; 0x30AD; 0x30AE; 462 0x30AF; 0x30B0; 0x30B1; 0x30B2; 0x30B3; 0x30B4; 0x30B5; 0x30B6; 463 0x30B7; 0x30B8; 0x30B9; 0x30BA; 0x30BB; 0x30BC; 0x30BD; 0x30BE; 464 0x30BF; 0x30C0; 0x30C1; 0x30C2; 0x30C4; 0x30C5; 0x30C6; 0x30C7; 465 0x30C8; 0x30C9; 0x30CF; 0x30D0; 0x30D2; 0x30D3; 0x30D5; 0x30D6; 466 0x30D8; 0x30D9; 0x30DB; 0x30DC; 0x30EF; 0x30F7; 0x30F0; 0x30F8; 467 0x30F1; 0x30F9; 0x30F2; 0x30FA; 0x30FD; 0x30FE; 0x306F; 0x3071; 468 0x3072; 0x3074; 0x3075; 0x3077; 0x3078; 0x307A; 0x307B; 0x307D; 469 0x30CF; 0x30D1; 0x30D2; 0x30D4; 0x30D5; 0x30D7; 0x30D8; 0x30DA; 470 0x30DB; 0x30DD 471 |] 472 473 let uniCharCombiningBitmap = "\ 474 \x00\x00\x00\x01\x02\x03\x04\x05\ 475 \x00\x06\x07\x08\x09\x0A\x0B\x0C\ 476 \x0D\x14\x00\x00\x00\x00\x00\x0E\ 477 \x0F\x00\x00\x00\x00\x00\x00\x00\ 478 \x10\x00\x00\x00\x00\x00\x00\x00\ 479 \x00\x00\x00\x00\x00\x00\x00\x00\ 480 \x11\x00\x00\x00\x00\x00\x00\x00\ 481 \x00\x00\x00\x00\x00\x00\x00\x00\ 482 \x00\x00\x00\x00\x00\x00\x00\x00\ 483 \x00\x00\x00\x00\x00\x00\x00\x00\ 484 \x00\x00\x00\x00\x00\x00\x00\x00\ 485 \x00\x00\x00\x00\x00\x00\x00\x00\ 486 \x00\x00\x00\x00\x00\x00\x00\x00\ 487 \x00\x00\x00\x00\x00\x00\x00\x00\ 488 \x00\x00\x00\x00\x00\x00\x00\x00\ 489 \x00\x00\x00\x00\x00\x00\x00\x00\ 490 \x00\x00\x00\x00\x00\x00\x00\x00\ 491 \x00\x00\x00\x00\x00\x00\x00\x00\ 492 \x00\x00\x00\x00\x00\x00\x00\x00\ 493 \x00\x00\x00\x00\x00\x00\x00\x00\ 494 \x00\x00\x00\x00\x00\x00\x00\x00\ 495 \x00\x00\x00\x00\x00\x00\x00\x00\ 496 \x00\x00\x00\x00\x00\x00\x00\x00\ 497 \x00\x00\x00\x00\x00\x00\x00\x00\ 498 \x00\x00\x00\x00\x00\x00\x00\x00\ 499 \x00\x00\x00\x00\x00\x00\x00\x00\ 500 \x00\x00\x00\x00\x00\x00\x00\x00\ 501 \x00\x00\x00\x00\x00\x00\x00\x00\ 502 \x00\x00\x00\x00\x00\x00\x00\x00\ 503 \x00\x00\x00\x00\x00\x00\x00\x00\ 504 \x00\x00\x00\x00\x00\x00\x00\x00\ 505 \x00\x00\x00\x12\x00\x00\x13\x00\ 506 \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 507 \xFF\xFF\x00\x00\xFF\xFF\x00\x00\ 508 \x00\x00\x00\x00\x00\x00\x00\x00\ 509 \x00\x00\x00\x00\x00\x00\x00\x00\ 510 \x00\x00\x00\x00\x00\x00\x00\x00\ 511 \x00\x00\x00\x00\x00\x00\x00\x00\ 512 \x78\x03\x00\x00\x00\x00\x00\x00\ 513 \x00\x00\x00\x00\x00\x00\x00\x00\ 514 \x00\x00\x00\x00\x00\x00\x00\x00\ 515 \x00\x00\x00\x00\x00\x00\x00\x00\ 516 \x00\x00\xFE\xFF\xFB\xFF\xFF\xBB\ 517 \x16\x00\x00\x00\x00\x00\x00\x00\ 518 \x00\x00\x00\x00\x00\x00\x00\x00\ 519 \x00\xF8\x3F\x00\x00\x00\x01\x00\ 520 \x00\x00\x00\x00\x00\x00\x00\x00\ 521 \x00\x00\xC0\xFF\x9F\x3D\x00\x00\ 522 \x00\x00\x02\x00\x00\x00\xFF\xFF\ 523 \xFF\x07\x00\x00\x00\x00\x00\x00\ 524 \x00\x00\x00\x00\xC0\xFF\x01\x00\ 525 \x00\x00\x00\x00\x00\x00\x00\x00\ 526 \x0E\x00\x00\x00\x00\x00\x00\xD0\ 527 \xFF\x3F\x1E\x00\x0C\x00\x00\x00\ 528 \x0E\x00\x00\x00\x00\x00\x00\xD0\ 529 \x9F\x39\x80\x00\x0C\x00\x00\x00\ 530 \x04\x00\x00\x00\x00\x00\x00\xD0\ 531 \x87\x39\x00\x00\x00\x00\x03\x00\ 532 \x0E\x00\x00\x00\x00\x00\x00\xD0\ 533 \xBF\x3B\x00\x00\x00\x00\x00\x00\ 534 \x0E\x00\x00\x00\x00\x00\x00\xD0\ 535 \x8F\x39\xC0\x00\x00\x00\x00\x00\ 536 \x04\x00\x00\x00\x00\x00\x00\xC0\ 537 \xC7\x3D\x80\x00\x00\x00\x00\x00\ 538 \x0E\x00\x00\x00\x00\x00\x00\xC0\ 539 \xDF\x3D\x60\x00\x00\x00\x00\x00\ 540 \x0C\x00\x00\x00\x00\x00\x00\xC0\ 541 \xDF\x3D\x60\x00\x00\x00\x00\x00\ 542 \x0C\x00\x00\x00\x00\x00\x00\xC0\ 543 \xCF\x3D\x80\x00\x00\x00\x00\x00\ 544 \x0C\x00\x00\x00\x00\x00\x00\x00\ 545 \x00\x84\x5F\xFF\x00\x00\x0C\x00\ 546 \x00\x00\x00\x00\x00\x00\xF2\x07\ 547 \x80\x7F\x00\x00\x00\x00\x00\x00\ 548 \x00\x00\x00\x00\x00\x00\xF2\x1B\ 549 \x00\x3F\x00\x00\x00\x00\x00\x00\ 550 \x00\x00\x00\x03\x00\x00\xA0\xC2\ 551 \x00\x00\x00\x00\x00\x00\xFE\xFF\ 552 \xDF\x00\xFF\xFE\xFF\xFF\xFF\x1F\ 553 \x40\x00\x00\x00\x00\x00\x00\x00\ 554 \x00\x00\x00\x00\x00\xF0\xC7\x03\ 555 \x00\x00\xC0\x03\x00\x00\x00\x00\ 556 \x00\x00\x00\x00\x00\x00\x00\x00\ 557 \x00\x00\x00\x00\x00\x00\x00\x00\ 558 \x00\x00\x1C\x00\x00\x00\x1C\x00\ 559 \x00\x00\x0C\x00\x00\x00\x0C\x00\ 560 \x00\x00\x00\x00\x00\x00\xF0\xFF\ 561 \xFF\xFF\x0F\x00\x00\x00\x00\x00\ 562 \x00\x38\x00\x00\x00\x00\x00\x00\ 563 \x00\x00\x00\x00\x00\x00\x00\x00\ 564 \x00\x00\x00\x00\x00\x02\x00\x00\ 565 \x00\x00\x00\x00\x00\x00\x00\x00\ 566 \x00\x00\x00\x00\x00\x00\x00\x00\ 567 \x00\x00\x00\x00\x00\x00\x00\x00\ 568 \x00\x00\x00\x00\x00\x00\x00\x00\ 569 \x00\x00\xFF\xFF\xFF\x07\x00\x00\ 570 \x00\x00\x00\x00\x00\xFC\x00\x00\ 571 \x00\x00\x00\x00\x00\x00\x00\x00\ 572 \x00\x00\x00\x06\x00\x00\x00\x00\ 573 \x00\x00\x00\x00\x00\x00\x00\x00\ 574 \x00\x00\x00\x40\x00\x00\x00\x00\ 575 \x00\x00\x00\x00\x00\x00\x00\x00\ 576 \x00\x00\x00\x00\x00\x00\x00\x00\ 577 \x00\x00\x00\x00\x00\x00\x00\x00\ 578 \xFF\xFF\x00\x00\x0F\x00\x00\x00\ 579 \x00\x00\x00\x00\x00\x00\x00\x00\ 580 \x00\x00\x00\x00\x00\x00\x00\x00\ 581 \x00\x00\x00\x00\x00\x00\x00\x00\ 582 \x00\x00\x00\x00\x00\x00\x00\x00\ 583 \x00\x00\x00\x00\xFE\xFF\x3F\x00\ 584 \x00\x00\x00\x00\x00\xFF\xFF\xFF\ 585 \x07\x00\x00\x00\x00\x00\x00\x00" 586 587 (****) 588 589 let bitmap_test base bitmap character = 590 character >= base && character < 0x10000 591 && 592 (let value = get bitmap ((character lsr 8) land 0xFF) in 593 value = 0xFF 594 || 595 (value <> 0 596 && 597 get bitmap ((value - 1) * 32 + 256 + (character land 0xFF) / 8) 598 land (1 lsl (character land 7)) <> 0)) 599 600 let unicode_combinable character = 601 bitmap_test 0x0300 uniCharCombiningBitmap character 602 603 let rec find_rec t i j v = 604 if i + 1 = j then begin 605 if t.(i * 2) = v then t.(i * 2 + 1) else 0 606 end else begin 607 let k = (i + j) / 2 in 608 if v < t.(k * 2) then 609 find_rec t i k v 610 else 611 find_rec t k j v 612 end 613 614 let find t i n v = 615 let j = i + n in 616 if v < t.(2 * i) || v > t.(2 * (j - 1)) then 0 else 617 find_rec t i j v 618 619 let uniCharPrecompSourceTableLen = Array.length uniCharPrecompSourceTable / 2 620 621 let combine v v' = 622 if v' >= hangul_vbase && v' < hangul_tbase + hangul_tcount then begin 623 if 624 v' < hangul_vbase + hangul_vcount && 625 v >= hangul_lbase && v < hangul_lbase + hangul_lcount 626 then 627 hangul_sbase + ((v - hangul_lbase) * (hangul_vcount * hangul_tcount)) + 628 ((v' - hangul_vbase) * hangul_tcount) 629 else if 630 v' > hangul_tbase && 631 v >= hangul_sbase && v < hangul_sbase + hangul_scount 632 then 633 if (v - hangul_sbase) mod hangul_tcount <> 0 then 0 else 634 v + v' - hangul_tbase 635 else 636 0 637 end else begin 638 let k = 639 find uniCharPrecompSourceTable 0 640 uniCharPrecompSourceTableLen v' 641 in 642 if k = 0 then 0 else 643 find uniCharBMPPrecompDestinationTable (k land 0xFFFF) (k lsr 16) v 644 end 645 646 (****) 647 648 let rec scan d s i l = 649 if i < l then begin 650 let c = get s i in 651 if c < 0x80 then 652 cont d s i l (i + 1) c 653 else if c < 0xE0 then begin 654 (* 80 - 7FF *) 655 if c < 0xc2 || i + 1 >= l then fail () else 656 let c1 = get s (i + 1) in 657 if c1 land 0xc0 <> 0x80 then fail () else 658 let v = c lsl 6 + c1 - 0x3080 in 659 cont d s i l (i + 2) v 660 end else if c < 0xF0 then begin 661 (* 800 - FFFF *) 662 if i + 2 >= l then fail () else 663 let c1 = get s (i + 1) in 664 let c2 = get s (i + 2) in 665 if (c1 lor c2) land 0xc0 <> 0x80 then fail () else 666 let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in 667 if v < 0x800 then fail () else 668 cont d s i l (i + 3) v 669 end else begin 670 (* 10000 - 10FFFF *) 671 if i + 3 >= l then fail () else 672 let c1 = get s (i + 1) in 673 let c2 = get s (i + 2) in 674 let c3 = get s (i + 3) in 675 if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else 676 let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in 677 if v < 0x10000 || v > 0x10ffff then fail () else 678 cont d s i l (i + 4) v 679 end 680 end else begin 681 let (i1, i2) = d in 682 String.blit s i2 s i1 (l - i2); 683 String.sub s 0 (i1 + l - i2) 684 end 685 686 and cont d s i l j v' = 687 if unicode_combinable v' then begin 688 let i = prev_char s i in 689 let (v, _) = decode_char s i l in 690 let v'' = combine v v' in 691 if v'' = 0 then 692 scan d s j l 693 else begin 694 let (i1, i2) = d in 695 String.blit s i2 s i1 (i - i2); 696 let i1 = i1 + i - i2 in 697 let (v'', i) = compose_rec s j l v'' in 698 let i1 = encode_char s i1 l v'' in 699 scan (i1, i) s i l 700 end 701 end else 702 scan d s j l 703 704 and compose_rec s i l v = 705 try 706 let (v', j) = decode_char s i l in 707 if unicode_combinable v' then begin 708 let v'' = combine v v' in 709 if v'' = 0 then 710 (v, i) 711 else 712 compose_rec s j l v'' 713 end else 714 (v, i) 715 with Invalid -> 716 (v, i) 717 718 let compose s = 719 try scan (0, 0) (String.copy s) 0 (String.length s) with Invalid -> s 720 721 (***) 722 723 let set_2 s i v = 724 set s i (v land 0xff); 725 set s (i + 1) (v lsr 8) 726 727 let get_2 s i = (get s (i + 1)) lsl 8 + get s i 728 729 let rec scan s' j s i l = 730 if i < l then begin 731 let c = get s i in 732 if c < 0x80 then 733 cont s' j s (i + 1) l c 734 else if c < 0xE0 then begin 735 (* 80 - 7FF *) 736 if c < 0xc2 || i + 1 >= l then fail () else 737 let c1 = get s (i + 1) in 738 if c1 land 0xc0 <> 0x80 then fail () else 739 let v = c lsl 6 + c1 - 0x3080 in 740 cont s' j s (i + 2) l v 741 end else if c < 0xF0 then begin 742 (* 800 - FFFF *) 743 if i + 2 >= l then fail () else 744 let c1 = get s (i + 1) in 745 let c2 = get s (i + 2) in 746 if (c1 lor c2) land 0xc0 <> 0x80 then fail () else 747 let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in 748 if v < 0x800 then fail () else 749 cont s' j s (i + 3) l v 750 end else begin 751 (* 10000 - 10FFFF *) 752 if i + 3 >= l then fail () else 753 let c1 = get s (i + 1) in 754 let c2 = get s (i + 2) in 755 let c3 = get s (i + 3) in 756 if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else 757 let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in 758 if v < 0x10000 || v > 0x10ffff then fail () else 759 cont s' j s (i + 4) l v 760 end 761 end else 762 String.sub s' 0 j 763 764 and cont s' j s i l v = 765 if v < 0x10000 then begin 766 set_2 s' j v; 767 scan s' (j + 2) s i l 768 end else begin 769 let v = v - 0x10000 in 770 set_2 s' j (v lsr 10 + 0xD800); 771 set_2 s' (j + 2) (v land 0x3FF + 0xDC00); 772 scan s' (j + 4) s i l 773 end 774 775 let to_utf_16 s = 776 let l = String.length s in 777 let s' = Bytes.create (2 * l) in 778 scan s' 0 s 0 l 779 780 (****) 781 782 let rec scan s' i' l' s i l = 783 if i + 2 <= l then begin 784 let v = get_2 s i in 785 if v < 0xD800 || v > 0xDFFF then 786 let i' = encode_char s' i' l' v in 787 scan s' i' l' s (i + 2) l 788 else if v >= 0xdc00 || i + 4 > l then 789 fail () 790 else begin 791 let v' = get_2 s (i + 2) in 792 if v' < 0xDC00 || v' > 0XDFFF then fail () else 793 let i' = 794 encode_char s' i' l' ((v - 0xD800) lsl 10 + (v' - 0xDC00) + 0x10000) 795 in 796 scan s' i' l' s (i + 4) l 797 end 798 end else if i < l then 799 fail () 800 else 801 String.sub s' 0 i' 802 803 804 let from_utf_16 s = 805 let l = String.length s in 806 let l' = 3 * l / 2 in 807 let s' = Bytes.create l' in 808 scan s' 0 l' s 0 l 809 810 (* 811 from_utf16 812 813 2 bytes -> 3 bytes 814 4 bytes -> 4 bytes 815 *)