1: %%
    2: %% %CopyrightBegin%
    3: %% 
    4: %% Copyright Ericsson AB 2008-2011. All Rights Reserved.
    5: %% 
    6: %% The contents of this file are subject to the Erlang Public License,
    7: %% Version 1.1, (the "License"); you may not use this file except in
    8: %% compliance with the License. You should have received a copy of the
    9: %% Erlang Public License along with this software. If not, it can be
   10: %% retrieved online at http://www.erlang.org/.
   11: %% 
   12: %% Software distributed under the License is distributed on an "AS IS"
   13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
   14: %% the License for the specific language governing rights and limitations
   15: %% under the License.
   16: %% 
   17: %% %CopyrightEnd%
   18: %%
   19: 
   20: -module(bs_utf_no_opt_SUITE).
   21: 
   22: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 
   23: 	 init_per_group/2,end_per_group/2,
   24: 	 init_per_testcase/2,end_per_testcase/2,
   25: 	 utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1,
   26: 	 utf8_illegal_sequences/1,utf16_illegal_sequences/1,
   27: 	 utf32_illegal_sequences/1,
   28: 	 bad_construction/1]).
   29: 
   30: -include_lib("test_server/include/test_server.hrl").
   31: 
   32: -define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])).
   33: 
   34: init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) ->
   35:     Dog = ?t:timetrap(?t:minutes(6)),
   36:     [{watchdog,Dog}|Config].
   37: 
   38: end_per_testcase(_Func, Config) ->
   39:     Dog = ?config(watchdog, Config),
   40:     ?t:timetrap_cancel(Dog).
   41: 
   42: suite() -> [{ct_hooks,[ts_install_cth]}].
   43: 
   44: all() -> 
   45:     [utf8_roundtrip, utf16_roundtrip, utf32_roundtrip,
   46:      utf8_illegal_sequences, utf16_illegal_sequences,
   47:      utf32_illegal_sequences, bad_construction].
   48: 
   49: groups() -> 
   50:     [].
   51: 
   52: init_per_suite(Config) ->
   53:     Config.
   54: 
   55: end_per_suite(_Config) ->
   56:     ok.
   57: 
   58: init_per_group(_GroupName, Config) ->
   59:     Config.
   60: 
   61: end_per_group(_GroupName, Config) ->
   62:     Config.
   63: 
   64: 
   65: utf8_roundtrip(Config) when is_list(Config) ->
   66:     ?line utf8_roundtrip(0, 16#D7FF),
   67:     ?line utf8_roundtrip(16#E000, 16#10FFFF),
   68:     ok.
   69: 
   70: utf8_roundtrip(First, Last) when First =< Last ->
   71:     Bin = int_to_utf8(First),
   72:     Bin = id(<<First/utf8>>),
   73:     Bin = id(<<(id(<<>>))/binary,First/utf8>>),
   74:     Unaligned = id(<<3:2,First/utf8>>),
   75:     <<_:2,Bin/binary>> = Unaligned,
   76:     <<First/utf8>> = Bin,
   77:     <<First/utf8>> = make_unaligned(Bin),
   78:     utf8_roundtrip(First+1, Last);
   79: utf8_roundtrip(_, _) -> ok.
   80: 
   81: utf16_roundtrip(Config) when is_list(Config) ->
   82:     Big = fun utf16_big_roundtrip/1,
   83:     Little = fun utf16_little_roundtrip/1,
   84:     PidRefs = [spawn_monitor(fun() ->
   85: 				     do_utf16_roundtrip(Fun)
   86: 			     end) || Fun <- [Big,Little]],
   87:     [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
   88: 	{Pid,Ref} <- PidRefs],
   89:     ok.
   90: 
   91: do_utf16_roundtrip(Fun) ->
   92:     do_utf16_roundtrip(0, 16#D7FF, Fun),
   93:     do_utf16_roundtrip(16#E000, 16#10FFFF, Fun).
   94: 
   95: do_utf16_roundtrip(First, Last, Fun) when First =< Last ->
   96:     Fun(First),
   97:     do_utf16_roundtrip(First+1, Last, Fun);
   98: do_utf16_roundtrip(_, _, _) -> ok.
   99: 
  100: utf16_big_roundtrip(Char) ->
  101:     Bin = id(<<Char/utf16>>),
  102:     Bin = id(<<(id(<<>>))/binary,Char/utf16>>),
  103:     Unaligned = id(<<3:2,Char/utf16>>),
  104:     <<_:2,Bin/binary>> = Unaligned,
  105:     <<Char/utf16>> = Bin,
  106:     <<Char/utf16>> = make_unaligned(Bin),
  107:     ok.
  108: 
  109: utf16_little_roundtrip(Char) ->
  110:     Bin = id(<<Char/little-utf16>>),
  111:     Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>),
  112:     Unaligned = id(<<3:2,Char/little-utf16>>),
  113:     <<_:2,Bin/binary>> = Unaligned,
  114:     <<Char/little-utf16>> = Bin,
  115:     <<Char/little-utf16>> = make_unaligned(Bin),
  116:     ok.
  117: 
  118: utf32_roundtrip(Config) when is_list(Config) ->
  119:     Big = fun utf32_big_roundtrip/1,
  120:     Little = fun utf32_little_roundtrip/1,
  121:     PidRefs = [spawn_monitor(fun() ->
  122: 				     do_utf32_roundtrip(Fun)
  123: 			     end) || Fun <- [Big,Little]],
  124:     [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
  125: 	{Pid,Ref} <- PidRefs],
  126:     ok.
  127: 
  128: do_utf32_roundtrip(Fun) ->
  129:     do_utf32_roundtrip(0, 16#D7FF, Fun),
  130:     do_utf32_roundtrip(16#E000, 16#10FFFF, Fun).
  131: 
  132: do_utf32_roundtrip(First, Last, Fun) when First =< Last ->
  133:     Fun(First),
  134:     do_utf32_roundtrip(First+1, Last, Fun);
  135: do_utf32_roundtrip(_, _, _) -> ok.
  136: 
  137: utf32_big_roundtrip(Char) ->
  138:     Bin = id(<<Char/utf32>>),
  139:     Bin = id(<<(id(<<>>))/binary,Char/utf32>>),
  140:     Unaligned = id(<<3:2,Char/utf32>>),
  141:     <<_:2,Bin/binary>> = Unaligned,
  142:     <<Char/utf32>> = Bin,
  143:     <<Char/utf32>> = make_unaligned(Bin),
  144:     ok.
  145: 
  146: utf32_little_roundtrip(Char) ->
  147:     Bin = id(<<Char/little-utf32>>),
  148:     Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>),
  149:     Unaligned = id(<<3:2,Char/little-utf32>>),
  150:     <<_:2,Bin/binary>> = Unaligned,
  151:     <<Char/little-utf32>> = Bin,
  152:     <<Char/little-utf32>> = make_unaligned(Bin),
  153:     ok.
  154: 
  155: utf8_illegal_sequences(Config) when is_list(Config) ->
  156:     ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
  157:     ?line fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
  158: 
  159:     %% Illegal first character.
  160:     ?line [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)],
  161: 
  162:     %% Short sequences.
  163:     ?line short_sequences(16#80, 16#10FFFF),
  164: 
  165:     %% Overlong sequences. (Using more bytes than necessary
  166:     %% is not allowed.)
  167:     ?line overlong(0, 127, 2),
  168:     ?line overlong(128, 16#7FF, 3),
  169:     ?line overlong(16#800, 16#FFFF, 4),
  170:     ok.
  171: 
  172: fail_range(Char, End) when Char =< End ->
  173:     {'EXIT',_} = (catch <<Char/utf8>>),
  174:     Bin = int_to_utf8(Char),
  175:     fail(Bin),
  176:     fail_range(Char+1, End);
  177: fail_range(_, _) -> ok.
  178: 
  179: short_sequences(Char, End) ->
  180:     Step = (End - Char) div erlang:system_info(schedulers) + 1,
  181:     PidRefs = short_sequences_1(Char, Step, End),
  182:     [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
  183: 	{Pid,Ref} <- PidRefs],
  184:     ok.
  185: 
  186: short_sequences_1(Char, Step, End) when Char =< End ->
  187:     CharEnd = lists:min([Char+Step-1,End]),
  188:     [spawn_monitor(fun() ->
  189: 			   io:format("~p - ~p\n", [Char,CharEnd]),
  190: 			   do_short_sequences(Char, CharEnd)
  191: 		   end)|short_sequences_1(Char+Step, Step, End)];
  192: short_sequences_1(_, _, _) -> [].
  193: 
  194: do_short_sequences(Char, End) when Char =< End ->
  195:     short_sequence(Char),
  196:     do_short_sequences(Char+1, End);
  197: do_short_sequences(_, _) -> ok.
  198: 
  199: short_sequence(I) ->
  200:     case int_to_utf8(I) of
  201: 	<<S0:3/binary,_:8>> ->
  202: 	    <<S1:2/binary,R1:8>> = S0,
  203: 	    <<S2:1/binary,_:8>> = S1,
  204: 	    fail(S0),
  205: 	    fail(S1),
  206: 	    fail(S2),
  207: 	    fail(<<S2/binary,16#7F,R1,R1>>),
  208: 	    fail(<<S1/binary,16#7F,R1>>),
  209: 	    fail(<<S0/binary,16#7F>>);
  210: 	<<S0:2/binary,_:8>> ->
  211: 	    <<S1:1/binary,R1:8>> = S0,
  212: 	    fail(S0),
  213: 	    fail(S1),
  214: 	    fail(<<S0/binary,16#7F>>),
  215: 	    fail(<<S1/binary,16#7F>>),
  216: 	    fail(<<S1/binary,16#7F,R1>>);
  217: 	<<S:1/binary,_:8>> ->
  218: 	    fail(S),
  219: 	    fail(<<S/binary,16#7F>>)
  220:     end.
  221: 
  222: overlong(Char, Last, NumBytes) when Char =< Last ->
  223:     overlong(Char, NumBytes),
  224:     overlong(Char+1, Last, NumBytes);
  225: overlong(_, _, _) -> ok.
  226: 
  227: overlong(Char, NumBytes) when NumBytes < 5 ->
  228:     case int_to_utf8(Char, NumBytes) of
  229: 	<<Char/utf8>>=Bin ->
  230: 	    ?t:fail({illegal_encoding_accepted,Bin,Char});
  231: 	<<OtherChar/utf8>>=Bin ->
  232: 	    ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar});
  233: 	_ -> ok
  234:     end,
  235:     overlong(Char, NumBytes+1);
  236: overlong(_, _) -> ok.
  237: 
  238: fail(Bin) ->
  239:     fail_1(Bin),
  240:     fail_1(make_unaligned(Bin)).
  241: 
  242: fail_1(<<Char/utf8>>=Bin) ->
  243:     ?t:fail({illegal_encoding_accepted,Bin,Char});
  244: fail_1(_) -> ok.
  245: 
  246: 
  247: utf16_illegal_sequences(Config) when is_list(Config) ->
  248:     ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
  249:     ?line utf16_fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
  250: 
  251:     ?line lonely_hi_surrogate(16#D800, 16#DFFF),
  252:     ?line leading_lo_surrogate(16#DC00, 16#DFFF),
  253:     
  254:     ok.
  255: 
  256: utf16_fail_range(Char, End) when Char =< End ->
  257:     {'EXIT',_} = (catch <<Char/big-utf16>>),
  258:     {'EXIT',_} = (catch <<Char/little-utf16>>),
  259:     utf16_fail_range(Char+1, End);
  260: utf16_fail_range(_, _) -> ok.
  261: 
  262: lonely_hi_surrogate(Char, End) when Char =< End ->
  263:     BinBig = <<Char:16/big>>,
  264:     BinLittle = <<Char:16/little>>,
  265:     case {BinBig,BinLittle} of
  266: 	{<<Bad/big-utf16>>,_} ->
  267: 	    ?t:fail({lonely_hi_surrogate_accepted,Bad});
  268: 	{_,<<Bad/little-utf16>>} ->
  269: 	    ?t:fail({lonely_hi_surrogate_accepted,Bad});
  270: 	{_,_} ->
  271: 	    ok
  272:     end,
  273:     lonely_hi_surrogate(Char+1, End);
  274: lonely_hi_surrogate(_, _) -> ok.
  275: 
  276: leading_lo_surrogate(Char, End) when Char =< End ->
  277:     leading_lo_surrogate(Char, 16#D800, 16#DFFF),
  278:     leading_lo_surrogate(Char+1, End);
  279: leading_lo_surrogate(_, _) -> ok.
  280: 
  281: leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End ->
  282:     BinBig = <<HiSurr:16/big,LoSurr:16/big>>,
  283:     BinLittle = <<HiSurr:16/little,LoSurr:16/little>>,
  284:     case {BinBig,BinLittle} of
  285: 	{<<Bad/big-utf16,_/bits>>,_} ->
  286: 	    ?t:fail({leading_lo_surrogate_accepted,Bad});
  287: 	{_,<<Bad/little-utf16,_/bits>>} ->
  288: 	    ?t:fail({leading_lo_surrogate_accepted,Bad});
  289: 	{_,_} ->
  290: 	    ok
  291:     end,
  292:     leading_lo_surrogate(HiSurr, LoSurr+1, End);
  293: leading_lo_surrogate(_, _, _) -> ok.
  294: 
  295: utf32_illegal_sequences(Config) when is_list(Config) ->
  296:     ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
  297:     ?line utf32_fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
  298:     ?line utf32_fail_range(-100, -1),
  299:     ok.
  300: 
  301: utf32_fail_range(Char, End) when Char =< End ->
  302:     {'EXIT',_} = (catch <<Char/big-utf32>>),
  303:     {'EXIT',_} = (catch <<Char/little-utf32>>),
  304:     case {<<Char:32>>,<<Char:32/little>>} of
  305: 	{<<Unexpected/utf32>>,_} ->
  306: 	    ?line ?t:fail(Unexpected);
  307: 	{_,<<Unexpected/little-utf32>>} ->
  308: 	    ?line ?t:fail(Unexpected);
  309: 	{_,_} -> ok
  310:     end,
  311:     utf32_fail_range(Char+1, End);
  312: utf32_fail_range(_, _) -> ok.
  313: 
  314: bad_construction(Config) when is_list(Config) ->
  315:     ?FAIL(<<3.14/utf8>>),
  316:     ?FAIL(<<3.1415/utf16>>),
  317:     ?FAIL(<<3.1415/utf32>>),
  318: 
  319:     ?FAIL(<<(-1)/utf8>>),
  320:     ?FAIL(<<(-1)/utf16>>),
  321:     {'EXIT',_} = (catch <<(id(-1))/utf8>>),
  322:     {'EXIT',_} = (catch <<(id(-1))/utf16>>),
  323:     {'EXIT',_} = (catch <<(id(-1))/utf32>>),
  324: 
  325:     ?FAIL(<<16#D800/utf8>>),
  326:     ?FAIL(<<16#D800/utf16>>),
  327:     ?FAIL(<<16#D800/utf32>>),
  328: 
  329:     ok.
  330: 
  331: %% This function intentionally allows construction of
  332: %% UTF-8 sequence in illegal ranges.
  333: int_to_utf8(I) when I =< 16#7F ->
  334:     <<I>>;
  335: int_to_utf8(I) when I =< 16#7FF ->
  336:     B2 = I,
  337:     B1 = (I bsr 6),
  338:     <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
  339: int_to_utf8(I) when I =< 16#FFFF ->
  340:     B3 = I,
  341:     B2 = (I bsr 6),
  342:     B1 = (I bsr 12),
  343:     <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
  344: int_to_utf8(I) when I =< 16#3FFFFF ->
  345:     B4 = I,
  346:     B3 = (I bsr 6),
  347:     B2 = (I bsr 12),
  348:     B1 = (I bsr 18),
  349:     <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>;
  350: int_to_utf8(I) when I =< 16#3FFFFFF ->
  351:     B5 = I,
  352:     B4 = (I bsr 6),
  353:     B3 = (I bsr 12),
  354:     B2 = (I bsr 18),
  355:     B1 = (I bsr 24),
  356:     <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6,
  357:      1:1,0:1,B5:6>>.
  358: 
  359: %% int_to_utf8(I, NumberOfBytes) -> Binary.
  360: %%  This function can be used to construct overlong sequences.
  361: int_to_utf8(I, 1) ->
  362:     <<I>>;
  363: int_to_utf8(I, 2) ->
  364:     B2 = I,
  365:     B1 = (I bsr 6),
  366:     <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
  367: int_to_utf8(I, 3) ->
  368:     B3 = I,
  369:     B2 = (I bsr 6),
  370:     B1 = (I bsr 12),
  371:     <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
  372: int_to_utf8(I, 4) ->
  373:     B4 = I,
  374:     B3 = (I bsr 6),
  375:     B2 = (I bsr 12),
  376:     B1 = (I bsr 18),
  377:     <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>.
  378: 
  379: make_unaligned(Bin0) when is_binary(Bin0) ->
  380:     Bin1 = <<0:3,Bin0/binary,31:5>>,
  381:     Sz = byte_size(Bin0),
  382:     <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
  383:     Bin.
  384: 
  385: fail_check({'EXIT',{badarg,_}}, Str, Vars) ->
  386:     try	evaluate(Str, Vars) of
  387: 	Res ->
  388: 	    io:format("Interpreted result: ~p", [Res]),
  389: 	    ?t:fail(did_not_fail_in_intepreted_code)
  390:     catch
  391: 	error:badarg ->
  392: 	    ok
  393:     end;
  394: fail_check(Res, _, _) ->
  395:     io:format("Compiled result: ~p", [Res]),
  396:     ?t:fail(did_not_fail_in_compiled_code).
  397: 
  398: evaluate(Str, Vars) ->
  399:     {ok,Tokens,_} =
  400: 	erl_scan:string(Str ++ " . "),
  401:     {ok, [Expr]} = erl_parse:parse_exprs(Tokens),
  402:     case erl_eval:expr(Expr, Vars) of
  403: 	{value, Result, _} ->
  404: 	    Result
  405:     end.
  406: 
  407: id(I) -> I.
  408: