1: %%
    2: %% %CopyrightBegin%
    3: %%
    4: %% Copyright Ericsson AB 2008-2011. All Rights Reserved.
    5: %%
    6: %% The contents of this file are subject to the Erlang Public License,
    7: %% Version 1.1, (the "License"); you may not use this file except in
    8: %% compliance with the License. You should have received a copy of the
    9: %% Erlang Public License along with this software. If not, it can be
   10: %% retrieved online at http://www.erlang.org/.
   11: %%
   12: %% Software distributed under the License is distributed on an "AS IS"
   13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
   14: %% the License for the specific language governing rights and limitations
   15: %% under the License.
   16: %%
   17: %% %CopyrightEnd%
   18: %%
   19: 
   20: %%
   21: 
   22: -module(bs_utf_SUITE).
   23: 
   24: -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2,
   25: 	 init_per_suite/1,end_per_suite/1,
   26: 	 init_per_testcase/2,end_per_testcase/2,
   27: 	 utf8_roundtrip/1,unused_utf_char/1,utf16_roundtrip/1,
   28: 	 utf32_roundtrip/1,guard/1,extreme_tripping/1]).
   29: 
   30: -include_lib("test_server/include/test_server.hrl").
   31: -compile([no_jopt,time]).
   32: 
   33: suite() -> [{ct_hooks,[ts_install_cth]}].
   34: 
   35: all() -> 
   36:     cases().
   37: 
   38: groups() -> 
   39:     [].
   40: 
   41: init_per_group(_GroupName, Config) ->
   42:     Config.
   43: 
   44: end_per_group(_GroupName, Config) ->
   45:     Config.
   46: 
   47: 
   48: cases() -> 
   49:     [utf8_roundtrip, unused_utf_char, utf16_roundtrip,
   50:      utf32_roundtrip, guard, extreme_tripping].
   51: 
   52: init_per_testcase(_Case, Config) ->
   53:     test_lib:interpret(?MODULE),
   54:     Dog = test_server:timetrap(?t:minutes(1)),
   55:     [{watchdog,Dog}|Config].
   56: 
   57: end_per_testcase(_Case, Config) ->
   58:     Dog = ?config(watchdog, Config),
   59:     ?t:timetrap_cancel(Dog),
   60:     ok.
   61: 
   62: init_per_suite(Config) when is_list(Config) ->
   63:     ?line test_lib:interpret(?MODULE),
   64:     ?line true = lists:member(?MODULE, int:interpreted()),
   65:     Config.
   66: 
   67: end_per_suite(Config) when is_list(Config) ->
   68:     ok.
   69: 
   70: utf8_roundtrip(Config) when is_list(Config) ->
   71:     ?line [utf8_roundtrip_1(P) || P <- utf_data()],
   72:     ok.
   73: 
   74: utf8_roundtrip_1({Str,Bin,Bin}) ->
   75:     ?line Str = utf8_to_list(Bin),
   76:     ?line Bin = list_to_utf8(Str),
   77:     ?line [ok = utf8_guard(C, <<42,C/utf8>>) || C <- Str],
   78:     ?line [error = utf8_guard(C, <<C/utf8>>) || C <- Str],
   79:     ok.
   80: 
   81: utf8_guard(C, Bin) when <<42,C/utf8>> =:= Bin -> ok;
   82: utf8_guard(_, _) -> error.
   83: 
   84: utf8_to_list(<<C/utf8,T/binary>>) ->
   85:     [C|utf8_to_list(T)];
   86: utf8_to_list(<<>>) -> [].
   87: 
   88: list_to_utf8(L) ->
   89:     list_to_utf8(L, <<>>).
   90: 
   91: list_to_utf8([H|T], Bin) ->
   92:     list_to_utf8(T, <<Bin/binary,H/utf8>>);
   93: list_to_utf8([], Bin) -> Bin.
   94: 
   95: unused_utf_char(Config) when is_list(Config) ->
   96:     [true = utf8_len(Utf8) =:= length(Str) ||
   97: 	{Str,Utf8} <- utf_data()],
   98:     ok.
   99: 
  100: utf8_len(B) ->
  101:     utf8_len(B, 0).
  102: 
  103: utf8_len(<<_/utf8,T/binary>>, N) ->
  104:     utf8_len(T, N+1);
  105: utf8_len(<<>>, N) -> N.
  106: 
  107: utf16_roundtrip(Config) when is_list(Config) ->
  108:     ?line {Str,Big,Big,Little,Little} = utf16_data(),
  109:     ?line 4 = utf16_big_len(Big),
  110:     ?line 4 = utf16_little_len(Little),
  111:     ?line Str = big_utf16_to_list(Big),
  112:     ?line Str = little_utf16_to_list(Little),
  113: 
  114:     ?line Big = list_to_big_utf16(Str),
  115:     ?line Little = list_to_little_utf16(Str),
  116: 
  117:     ok.
  118: 
  119: utf16_big_len(B) ->
  120:     utf16_big_len(B, 0).
  121: 
  122: utf16_big_len(<<_/utf16,T/binary>>, N) ->
  123:     utf16_big_len(T, N+1);
  124: utf16_big_len(<<>>, N) -> N.
  125: 
  126: utf16_little_len(B) ->
  127:     utf16_little_len(B, 0).
  128: 
  129: utf16_little_len(<<_/little-utf16,T/binary>>, N) ->
  130:     utf16_little_len(T, N+1);
  131: utf16_little_len(<<>>, N) -> N.
  132: 
  133: list_to_big_utf16(List) ->
  134:     list_to_big_utf16(List, <<>>).
  135: 
  136: list_to_big_utf16([H|T], Bin) ->
  137:     list_to_big_utf16(T, <<Bin/binary,H/utf16>>);
  138: list_to_big_utf16([], Bin) -> Bin.
  139: 
  140: list_to_little_utf16(List) ->
  141:     list_to_little_utf16(List, <<>>).
  142: 
  143: list_to_little_utf16([H|T], Bin) ->
  144:     list_to_little_utf16(T, <<Bin/binary,H/little-utf16>>);
  145: list_to_little_utf16([], Bin) -> Bin.
  146: 
  147: big_utf16_to_list(<<H/utf16,T/binary>>) ->
  148:     [H|big_utf16_to_list(T)];
  149: big_utf16_to_list(<<>>) -> [].
  150: 
  151: little_utf16_to_list(<<H/little-utf16,T/binary>>) ->
  152:     [H|little_utf16_to_list(T)];
  153: little_utf16_to_list(<<>>) -> [].
  154: 
  155: utf32_roundtrip(Config) when is_list(Config) ->
  156:     ?line {Str,Big,Big,Little,Little} = utf32_data(),
  157:     ?line 4 = utf32_big_len(Big),
  158:     ?line 4 = utf32_little_len(Little),
  159:     ?line Str = big_utf32_to_list(Big),
  160:     ?line Str = little_utf32_to_list(Little),
  161: 
  162:     ?line Big = list_to_big_utf32(Str),
  163:     ?line Little = list_to_little_utf32(Str),
  164: 
  165:     ok.
  166: 
  167: utf32_big_len(B) ->
  168:     utf32_big_len(B, 0).
  169: 
  170: utf32_big_len(<<_/utf32,T/binary>>, N) ->
  171:     utf32_big_len(T, N+1);
  172: utf32_big_len(<<>>, N) -> N.
  173: 
  174: utf32_little_len(B) ->
  175:     utf32_little_len(B, 0).
  176: 
  177: utf32_little_len(<<_/little-utf32,T/binary>>, N) ->
  178:     utf32_little_len(T, N+1);
  179: utf32_little_len(<<>>, N) -> N.
  180: 
  181: list_to_big_utf32(List) ->
  182:     list_to_big_utf32(List, <<>>).
  183: 
  184: list_to_big_utf32([H|T], Bin) ->
  185:     list_to_big_utf32(T, <<Bin/binary,H/utf32>>);
  186: list_to_big_utf32([], Bin) -> Bin.
  187: 
  188: list_to_little_utf32(List) ->
  189:     list_to_little_utf32(List, <<>>).
  190: 
  191: list_to_little_utf32([H|T], Bin) ->
  192:     list_to_little_utf32(T, <<Bin/binary,H/little-utf32>>);
  193: list_to_little_utf32([], Bin) -> Bin.
  194: 
  195: big_utf32_to_list(<<H/utf32,T/binary>>) ->
  196:     [H|big_utf32_to_list(T)];
  197: big_utf32_to_list(<<>>) -> [].
  198: 
  199: little_utf32_to_list(<<H/little-utf32,T/binary>>) ->
  200:     [H|little_utf32_to_list(T)];
  201: little_utf32_to_list(<<>>) -> [].
  202: 
  203: 
  204: guard(Config) when is_list(Config) ->
  205:     ?line error = do_guard(16#D800),
  206:     ok.
  207: 
  208: do_guard(C) when byte_size(<<C/utf8>>) =/= 42 -> ok;
  209: do_guard(C) when byte_size(<<C/utf16>>) =/= 42 -> ok;
  210: do_guard(C) when byte_size(<<C/utf32>>) =/= 42 -> ok;
  211: do_guard(_) -> error.
  212: 
  213: %% The purpose of this test is to make sure that
  214: %% the delayed creation of sub-binaries works.
  215: 
  216: extreme_tripping(Config) when is_list(Config) ->
  217:     ?line Unicode = lists:seq(0, 1024),
  218:     ?line Utf8 = unicode_to_utf8(Unicode, <<>>),
  219:     ?line Utf16 = utf8_to_utf16(Utf8, <<>>),
  220:     ?line Utf32 = utf8_to_utf32(Utf8, <<>>),
  221:     ?line Utf32 = utf16_to_utf32(Utf16, <<>>),
  222:     ?line Utf8 = utf32_to_utf8(Utf32, <<>>),
  223:     ?line Unicode = utf32_to_unicode(Utf32),
  224:     ok.
  225: 
  226: unicode_to_utf8([C|T], Bin) ->
  227:     unicode_to_utf8(T, <<Bin/bytes,C/utf8>>);
  228: unicode_to_utf8([], Bin) -> Bin.
  229: 
  230: utf8_to_utf16(<<C/utf8,T/binary>>, Bin) ->
  231:     utf8_to_utf16(T, <<Bin/bytes,C/utf16>>);
  232: utf8_to_utf16(<<>>, Bin) -> Bin.
  233: 
  234: utf16_to_utf32(<<C/utf16,T/binary>>, Bin) ->
  235:     utf16_to_utf32(T, <<Bin/bytes,C/utf32>>);
  236: utf16_to_utf32(<<>>, Bin) -> Bin.
  237: 
  238: utf8_to_utf32(<<C/utf8,T/binary>>, Bin) ->
  239:     utf8_to_utf32(T, <<Bin/bytes,C/utf32>>);
  240: utf8_to_utf32(<<>>, Bin) -> Bin.
  241: 
  242: utf32_to_utf8(<<C/utf32,T/binary>>, Bin) ->
  243:     utf32_to_utf8(T, <<Bin/bytes,C/utf8>>);
  244: utf32_to_utf8(<<>>, Bin) -> Bin.
  245: 
  246: utf32_to_unicode(<<C/utf32,T/binary>>) ->
  247:     [C|utf32_to_unicode(T)];
  248: utf32_to_unicode(<<>>) -> [].
  249: 
  250: utf_data() ->
  251: %% From RFC-3629.
  252: 
  253:     %% Give the compiler a change to do some constant propagation.
  254:     NotIdentical = 16#2262,
  255: 
  256:     [
  257:      %% "A<NOT IDENTICAL TO><ALPHA>."
  258:      {[16#0041,NotIdentical,16#0391,16#002E],
  259:       <<16#0041/utf8,NotIdentical/utf8,16#0391/utf8,16#002E/utf8>>,
  260:       <<16#41,16#E2,16#89,16#A2,16#CE,16#91,16#2E>>},
  261: 
  262:      %% Korean "hangugeo" (meaning "the Korean language")
  263:      {[16#D55C,16#AD6D,16#C5B4],
  264:       <<16#D55C/utf8,16#AD6D/utf8,16#C5B4/utf8>>,
  265:       <<16#ED,16#95,16#9C,16#EA,16#B5,16#AD,16#EC,16#96,16#B4>>},
  266: 
  267:      %% Japanese "nihongo" (meaning "the Japanese language").
  268:      {[16#65E5,16#672C,16#8A9E],
  269:       <<16#65E5/utf8,16#672C/utf8,16#8A9E/utf8>>,
  270:       <<16#E6,16#97,16#A5,16#E6,16#9C,16#AC,16#E8,16#AA,16#9E>>}
  271:     ].
  272: 
  273: utf16_data() ->
  274:     %% Example from RFC-2781. "*=Ra", where "*" represents a
  275:     %% hypothetical Ra hieroglyph (code point 16#12345).
  276: 
  277:     %% Give the compiler a change to do some constant propagation.
  278:     RaHieroglyph = 16#12345,
  279: 
  280:     %% First as a list of Unicode characters.
  281:     {[RaHieroglyph,16#3D,16#52,16#61],
  282: 
  283:      %% Big endian (the two binaries should be equal).
  284:      <<RaHieroglyph/big-utf16,16#3D/big-utf16,16#52/big-utf16,16#61/big-utf16>>,
  285:      <<16#D8,16#08,16#DF,16#45,16#00,16#3D,16#00,16#52,16#00,16#61>>,
  286: 
  287:      %% Little endian (the two binaries should be equal).
  288:      <<RaHieroglyph/little-utf16,16#3D/little-utf16,
  289:       16#52/little-utf16,16#61/little-utf16>>,
  290:      <<16#08,16#D8,16#45,16#DF,16#3D,16#00,16#52,16#00,16#61,16#00>>}.
  291: 
  292: utf32_data() ->
  293:     %% "A<NOT IDENTICAL TO><ALPHA>."
  294:     NotIdentical = 16#2262,
  295:     {[16#0041,NotIdentical,16#0391,16#002E],
  296: 
  297:      %% Big endian.
  298:      <<16#0041/utf32,NotIdentical/utf32,16#0391/utf32,16#002E/utf32>>,
  299:      <<16#41:32,NotIdentical:32,16#0391:32,16#2E:32>>,
  300: 
  301:      %% Little endian.
  302:      <<16#0041/little-utf32,NotIdentical/little-utf32,
  303:       16#0391/little-utf32,16#002E/little-utf32>>,
  304:      <<16#41:32/little,NotIdentical:32/little,
  305:       16#0391:32/little,16#2E:32/little>>}.