unicode_test.ml (2614B)
1 (*-*-coding: utf-8;-*-*) 2 3 #use "unicode_build.ml" 4 #load "unix.cma" 5 6 let _ = 7 Unix.system "test -f unicode_tables.cmo || ocamlc -c unicode_tables.ml";; 8 9 #load "unicode_tables.cmo" 10 #use "unicode.ml" 11 #use "reorder.ml" 12 13 let _ = 14 let b1 = Buffer.create 1024 in 15 let b2 = Buffer.create 1024 in 16 for i = 0 to 0x1ffff do 17 if i < 0xd800 || i > 0xdfff then begin 18 Buffer.add_string b1 (encode_utf8 i); 19 Buffer.add_string b2 (conv i) 20 end 21 done; 22 let s1 = Buffer.contents b1 in 23 let s2 = Buffer.contents b2 in 24 prerr_endline "==="; 25 Format.printf "%d %d@." (String.length s1) (String.length s2); 26 Format.printf "%d %d@." (String.length (normalize s1)) (String.length s2); 27 assert (normalize s1 = s2); 28 assert (normalize s2 = s2); 29 assert (normalize (compose s2) = s2); 30 assert (from_utf_16 (to_utf_16 s1) = s1) 31 32 let _ = 33 let b1 = Buffer.create 1024 in 34 let b2 = Buffer.create 1024 in 35 for i = hangul_sbase -128 to hangul_sbase + hangul_scount - 1 + 128 do 36 Buffer.add_string b1 (encode_utf8 i); 37 Buffer.add_string b2 (conv i) 38 done; 39 let s1 = Buffer.contents b1 in 40 let s2 = Buffer.contents b2 in 41 assert (compose s2 = s1) 42 43 let _ = 44 assert (compare "abcdéfgh" "ABCDÉFGH" = 0); 45 assert (compare "abcdéfghi" "ABCDÉFGH" = 1); 46 assert (compare "abcdefghi" "ABCDeFGH" = 1); 47 assert (compare "abcdéfgh" "ABCDÉFGHi" = -1); 48 assert (compare "abcdefgh" "ABCDeFGHi" = -1); 49 assert (compare "abcdéfgh" "ACCDÉFGH" = -1); 50 assert (compare "abcdéfgh" "ABCDÉFFH" = 1) 51 52 let _ = 53 for i = 0 to 0xffff do 54 if i < 0xd800 || i > 0xdfff then begin 55 let s = to_utf_16 (conv i) in 56 (*Format.printf "%04x@." (String.length s);*) 57 for j = 0 to String.length s / 2 - 2 do 58 let c1 = get s (j * 2) + get s (j * 2 + 1) * 256 in 59 let c2 = get s (j * 2 + 2) + get s (j * 2 + 3) * 256 in 60 let v1 = combining_class c1 in 61 let v2 = combining_class c2 in 62 (*Format.printf "%04x %04x => %02x %02x@." c1 c2 v1 v2;*) 63 (* if v1 > 0 && v2 > 0 then Format.printf "%d %d@." v1 v2;*) 64 assert (v1 = 0 || v2 = 0 || v1 <= v2) 65 done 66 end 67 done 68 69 let _ = 70 let s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x01\x03\x27\x03" in 71 order s; 72 assert (s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x27\x03\x01\x03"); 73 let s = from_utf_16 "\x01\x03\x27\x03" in 74 order s; 75 assert (s = from_utf_16 "\x27\x03\x01\x03") 76 (* 77 0061;LATIN SMALL LETTER A;...0;... 78 0063;LATIN SMALL LETTER C;...0;... 79 00E1;LATIN SMALL LETTER A WITH ACUTE;...0;...0061 0301;... 80 0107;LATIN SMALL LETTER C WITH ACUTE;...0;...0063 0301;... 81 0301;COMBINING ACUTE ACCENT;...230;... 82 0327;COMBINING CEDILLA;...202;... 83 *)