unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

unicode_test.ml (2614B)


      1 (*-*-coding: utf-8;-*-*)
      2 
      3 #use "unicode_build.ml"
      4 #load "unix.cma"
      5 
      6 let _ =
      7 Unix.system "test -f unicode_tables.cmo || ocamlc -c unicode_tables.ml";;
      8 
      9 #load "unicode_tables.cmo"
     10 #use "unicode.ml"
     11 #use "reorder.ml"
     12 
     13 let _ =
     14   let b1 = Buffer.create 1024 in
     15   let b2 = Buffer.create 1024 in
     16   for i = 0 to 0x1ffff do
     17     if i < 0xd800 || i > 0xdfff then begin
     18       Buffer.add_string b1 (encode_utf8 i);
     19       Buffer.add_string b2 (conv i)
     20     end
     21   done;
     22   let s1 = Buffer.contents b1 in
     23   let s2 = Buffer.contents b2 in
     24 prerr_endline "===";
     25 Format.printf "%d %d@." (String.length s1) (String.length s2);
     26 Format.printf "%d %d@." (String.length (normalize s1)) (String.length s2);
     27   assert (normalize s1 = s2);
     28   assert (normalize s2 = s2);
     29   assert (normalize (compose s2) = s2);
     30   assert (from_utf_16 (to_utf_16 s1) = s1)
     31 
     32 let _ =
     33   let b1 = Buffer.create 1024 in
     34   let b2 = Buffer.create 1024 in
     35   for i = hangul_sbase -128 to hangul_sbase + hangul_scount - 1 + 128 do
     36     Buffer.add_string b1 (encode_utf8 i);
     37     Buffer.add_string b2 (conv i)
     38   done;
     39   let s1 = Buffer.contents b1 in
     40   let s2 = Buffer.contents b2 in
     41   assert (compose s2 = s1)
     42 
     43 let _ =
     44   assert (compare "abcdéfgh" "ABCDÉFGH" = 0);
     45   assert (compare "abcdéfghi" "ABCDÉFGH" = 1);
     46   assert (compare "abcdefghi" "ABCDeFGH" = 1);
     47   assert (compare "abcdéfgh" "ABCDÉFGHi" = -1);
     48   assert (compare "abcdefgh" "ABCDeFGHi" = -1);
     49   assert (compare "abcdéfgh" "ACCDÉFGH" = -1);
     50   assert (compare "abcdéfgh" "ABCDÉFFH" = 1)
     51 
     52 let _ =
     53   for i = 0 to 0xffff do
     54     if i < 0xd800 || i > 0xdfff then begin
     55       let s = to_utf_16 (conv i) in
     56 (*Format.printf "%04x@." (String.length s);*)
     57       for j = 0 to String.length s / 2 - 2 do
     58         let c1 = get s (j * 2) + get s (j * 2 + 1) * 256 in
     59         let c2 = get s (j * 2 + 2) + get s (j * 2 + 3) * 256 in
     60         let v1 = combining_class c1 in
     61         let v2 = combining_class c2 in
     62 (*Format.printf "%04x %04x => %02x %02x@." c1 c2 v1 v2;*)
     63 (*        if v1 > 0 && v2 > 0 then Format.printf "%d %d@." v1 v2;*)
     64         assert (v1 = 0 || v2 = 0 || v1 <= v2)
     65       done
     66     end
     67   done
     68 
     69 let _ =
     70 let s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x01\x03\x27\x03" in
     71 order s;
     72 assert (s = from_utf_16 "\x61\x00\x01\x03\x63\x00\x27\x03\x01\x03");
     73 let s = from_utf_16 "\x01\x03\x27\x03" in
     74 order s;
     75 assert (s = from_utf_16 "\x27\x03\x01\x03")
     76 (*
     77 0061;LATIN SMALL LETTER A;...0;...
     78 0063;LATIN SMALL LETTER C;...0;...
     79 00E1;LATIN SMALL LETTER A WITH ACUTE;...0;...0061 0301;...
     80 0107;LATIN SMALL LETTER C WITH ACUTE;...0;...0063 0301;...
     81 0301;COMBINING ACUTE ACCENT;...230;...
     82 0327;COMBINING CEDILLA;...202;...
     83 *)