1: %% 2: %% %CopyrightBegin% 3: %% 4: %% Copyright Ericsson AB 2008-2011. All Rights Reserved. 5: %% 6: %% The contents of this file are subject to the Erlang Public License, 7: %% Version 1.1, (the "License"); you may not use this file except in 8: %% compliance with the License. You should have received a copy of the 9: %% Erlang Public License along with this software. If not, it can be 10: %% retrieved online at http://www.erlang.org/. 11: %% 12: %% Software distributed under the License is distributed on an "AS IS" 13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 14: %% the License for the specific language governing rights and limitations 15: %% under the License. 16: %% 17: %% %CopyrightEnd% 18: %% 19: 20: -module(bs_utf_no_opt_SUITE). 21: 22: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 23: init_per_group/2,end_per_group/2, 24: init_per_testcase/2,end_per_testcase/2, 25: utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1, 26: utf8_illegal_sequences/1,utf16_illegal_sequences/1, 27: utf32_illegal_sequences/1, 28: bad_construction/1]). 29: 30: -include_lib("test_server/include/test_server.hrl"). 31: 32: -define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])). 33: 34: init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) -> 35: Dog = ?t:timetrap(?t:minutes(6)), 36: [{watchdog,Dog}|Config]. 37: 38: end_per_testcase(_Func, Config) -> 39: Dog = ?config(watchdog, Config), 40: ?t:timetrap_cancel(Dog). 41: 42: suite() -> [{ct_hooks,[ts_install_cth]}]. 43: 44: all() -> 45: [utf8_roundtrip, utf16_roundtrip, utf32_roundtrip, 46: utf8_illegal_sequences, utf16_illegal_sequences, 47: utf32_illegal_sequences, bad_construction]. 48: 49: groups() -> 50: []. 51: 52: init_per_suite(Config) -> 53: Config. 54: 55: end_per_suite(_Config) -> 56: ok. 57: 58: init_per_group(_GroupName, Config) -> 59: Config. 60: 61: end_per_group(_GroupName, Config) -> 62: Config. 63: 64: 65: utf8_roundtrip(Config) when is_list(Config) -> 66: ?line utf8_roundtrip(0, 16#D7FF), 67: ?line utf8_roundtrip(16#E000, 16#10FFFF), 68: ok. 69: 70: utf8_roundtrip(First, Last) when First =< Last -> 71: Bin = int_to_utf8(First), 72: Bin = id(<<First/utf8>>), 73: Bin = id(<<(id(<<>>))/binary,First/utf8>>), 74: Unaligned = id(<<3:2,First/utf8>>), 75: <<_:2,Bin/binary>> = Unaligned, 76: <<First/utf8>> = Bin, 77: <<First/utf8>> = make_unaligned(Bin), 78: utf8_roundtrip(First+1, Last); 79: utf8_roundtrip(_, _) -> ok. 80: 81: utf16_roundtrip(Config) when is_list(Config) -> 82: Big = fun utf16_big_roundtrip/1, 83: Little = fun utf16_little_roundtrip/1, 84: PidRefs = [spawn_monitor(fun() -> 85: do_utf16_roundtrip(Fun) 86: end) || Fun <- [Big,Little]], 87: [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || 88: {Pid,Ref} <- PidRefs], 89: ok. 90: 91: do_utf16_roundtrip(Fun) -> 92: do_utf16_roundtrip(0, 16#D7FF, Fun), 93: do_utf16_roundtrip(16#E000, 16#10FFFF, Fun). 94: 95: do_utf16_roundtrip(First, Last, Fun) when First =< Last -> 96: Fun(First), 97: do_utf16_roundtrip(First+1, Last, Fun); 98: do_utf16_roundtrip(_, _, _) -> ok. 99: 100: utf16_big_roundtrip(Char) -> 101: Bin = id(<<Char/utf16>>), 102: Bin = id(<<(id(<<>>))/binary,Char/utf16>>), 103: Unaligned = id(<<3:2,Char/utf16>>), 104: <<_:2,Bin/binary>> = Unaligned, 105: <<Char/utf16>> = Bin, 106: <<Char/utf16>> = make_unaligned(Bin), 107: ok. 108: 109: utf16_little_roundtrip(Char) -> 110: Bin = id(<<Char/little-utf16>>), 111: Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>), 112: Unaligned = id(<<3:2,Char/little-utf16>>), 113: <<_:2,Bin/binary>> = Unaligned, 114: <<Char/little-utf16>> = Bin, 115: <<Char/little-utf16>> = make_unaligned(Bin), 116: ok. 117: 118: utf32_roundtrip(Config) when is_list(Config) -> 119: Big = fun utf32_big_roundtrip/1, 120: Little = fun utf32_little_roundtrip/1, 121: PidRefs = [spawn_monitor(fun() -> 122: do_utf32_roundtrip(Fun) 123: end) || Fun <- [Big,Little]], 124: [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || 125: {Pid,Ref} <- PidRefs], 126: ok. 127: 128: do_utf32_roundtrip(Fun) -> 129: do_utf32_roundtrip(0, 16#D7FF, Fun), 130: do_utf32_roundtrip(16#E000, 16#10FFFF, Fun). 131: 132: do_utf32_roundtrip(First, Last, Fun) when First =< Last -> 133: Fun(First), 134: do_utf32_roundtrip(First+1, Last, Fun); 135: do_utf32_roundtrip(_, _, _) -> ok. 136: 137: utf32_big_roundtrip(Char) -> 138: Bin = id(<<Char/utf32>>), 139: Bin = id(<<(id(<<>>))/binary,Char/utf32>>), 140: Unaligned = id(<<3:2,Char/utf32>>), 141: <<_:2,Bin/binary>> = Unaligned, 142: <<Char/utf32>> = Bin, 143: <<Char/utf32>> = make_unaligned(Bin), 144: ok. 145: 146: utf32_little_roundtrip(Char) -> 147: Bin = id(<<Char/little-utf32>>), 148: Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>), 149: Unaligned = id(<<3:2,Char/little-utf32>>), 150: <<_:2,Bin/binary>> = Unaligned, 151: <<Char/little-utf32>> = Bin, 152: <<Char/little-utf32>> = make_unaligned(Bin), 153: ok. 154: 155: utf8_illegal_sequences(Config) when is_list(Config) -> 156: ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. 157: ?line fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. 158: 159: %% Illegal first character. 160: ?line [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)], 161: 162: %% Short sequences. 163: ?line short_sequences(16#80, 16#10FFFF), 164: 165: %% Overlong sequences. (Using more bytes than necessary 166: %% is not allowed.) 167: ?line overlong(0, 127, 2), 168: ?line overlong(128, 16#7FF, 3), 169: ?line overlong(16#800, 16#FFFF, 4), 170: ok. 171: 172: fail_range(Char, End) when Char =< End -> 173: {'EXIT',_} = (catch <<Char/utf8>>), 174: Bin = int_to_utf8(Char), 175: fail(Bin), 176: fail_range(Char+1, End); 177: fail_range(_, _) -> ok. 178: 179: short_sequences(Char, End) -> 180: Step = (End - Char) div erlang:system_info(schedulers) + 1, 181: PidRefs = short_sequences_1(Char, Step, End), 182: [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || 183: {Pid,Ref} <- PidRefs], 184: ok. 185: 186: short_sequences_1(Char, Step, End) when Char =< End -> 187: CharEnd = lists:min([Char+Step-1,End]), 188: [spawn_monitor(fun() -> 189: io:format("~p - ~p\n", [Char,CharEnd]), 190: do_short_sequences(Char, CharEnd) 191: end)|short_sequences_1(Char+Step, Step, End)]; 192: short_sequences_1(_, _, _) -> []. 193: 194: do_short_sequences(Char, End) when Char =< End -> 195: short_sequence(Char), 196: do_short_sequences(Char+1, End); 197: do_short_sequences(_, _) -> ok. 198: 199: short_sequence(I) -> 200: case int_to_utf8(I) of 201: <<S0:3/binary,_:8>> -> 202: <<S1:2/binary,R1:8>> = S0, 203: <<S2:1/binary,_:8>> = S1, 204: fail(S0), 205: fail(S1), 206: fail(S2), 207: fail(<<S2/binary,16#7F,R1,R1>>), 208: fail(<<S1/binary,16#7F,R1>>), 209: fail(<<S0/binary,16#7F>>); 210: <<S0:2/binary,_:8>> -> 211: <<S1:1/binary,R1:8>> = S0, 212: fail(S0), 213: fail(S1), 214: fail(<<S0/binary,16#7F>>), 215: fail(<<S1/binary,16#7F>>), 216: fail(<<S1/binary,16#7F,R1>>); 217: <<S:1/binary,_:8>> -> 218: fail(S), 219: fail(<<S/binary,16#7F>>) 220: end. 221: 222: overlong(Char, Last, NumBytes) when Char =< Last -> 223: overlong(Char, NumBytes), 224: overlong(Char+1, Last, NumBytes); 225: overlong(_, _, _) -> ok. 226: 227: overlong(Char, NumBytes) when NumBytes < 5 -> 228: case int_to_utf8(Char, NumBytes) of 229: <<Char/utf8>>=Bin -> 230: ?t:fail({illegal_encoding_accepted,Bin,Char}); 231: <<OtherChar/utf8>>=Bin -> 232: ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar}); 233: _ -> ok 234: end, 235: overlong(Char, NumBytes+1); 236: overlong(_, _) -> ok. 237: 238: fail(Bin) -> 239: fail_1(Bin), 240: fail_1(make_unaligned(Bin)). 241: 242: fail_1(<<Char/utf8>>=Bin) -> 243: ?t:fail({illegal_encoding_accepted,Bin,Char}); 244: fail_1(_) -> ok. 245: 246: 247: utf16_illegal_sequences(Config) when is_list(Config) -> 248: ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. 249: ?line utf16_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. 250: 251: ?line lonely_hi_surrogate(16#D800, 16#DFFF), 252: ?line leading_lo_surrogate(16#DC00, 16#DFFF), 253: 254: ok. 255: 256: utf16_fail_range(Char, End) when Char =< End -> 257: {'EXIT',_} = (catch <<Char/big-utf16>>), 258: {'EXIT',_} = (catch <<Char/little-utf16>>), 259: utf16_fail_range(Char+1, End); 260: utf16_fail_range(_, _) -> ok. 261: 262: lonely_hi_surrogate(Char, End) when Char =< End -> 263: BinBig = <<Char:16/big>>, 264: BinLittle = <<Char:16/little>>, 265: case {BinBig,BinLittle} of 266: {<<Bad/big-utf16>>,_} -> 267: ?t:fail({lonely_hi_surrogate_accepted,Bad}); 268: {_,<<Bad/little-utf16>>} -> 269: ?t:fail({lonely_hi_surrogate_accepted,Bad}); 270: {_,_} -> 271: ok 272: end, 273: lonely_hi_surrogate(Char+1, End); 274: lonely_hi_surrogate(_, _) -> ok. 275: 276: leading_lo_surrogate(Char, End) when Char =< End -> 277: leading_lo_surrogate(Char, 16#D800, 16#DFFF), 278: leading_lo_surrogate(Char+1, End); 279: leading_lo_surrogate(_, _) -> ok. 280: 281: leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End -> 282: BinBig = <<HiSurr:16/big,LoSurr:16/big>>, 283: BinLittle = <<HiSurr:16/little,LoSurr:16/little>>, 284: case {BinBig,BinLittle} of 285: {<<Bad/big-utf16,_/bits>>,_} -> 286: ?t:fail({leading_lo_surrogate_accepted,Bad}); 287: {_,<<Bad/little-utf16,_/bits>>} -> 288: ?t:fail({leading_lo_surrogate_accepted,Bad}); 289: {_,_} -> 290: ok 291: end, 292: leading_lo_surrogate(HiSurr, LoSurr+1, End); 293: leading_lo_surrogate(_, _, _) -> ok. 294: 295: utf32_illegal_sequences(Config) when is_list(Config) -> 296: ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. 297: ?line utf32_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. 298: ?line utf32_fail_range(-100, -1), 299: ok. 300: 301: utf32_fail_range(Char, End) when Char =< End -> 302: {'EXIT',_} = (catch <<Char/big-utf32>>), 303: {'EXIT',_} = (catch <<Char/little-utf32>>), 304: case {<<Char:32>>,<<Char:32/little>>} of 305: {<<Unexpected/utf32>>,_} -> 306: ?line ?t:fail(Unexpected); 307: {_,<<Unexpected/little-utf32>>} -> 308: ?line ?t:fail(Unexpected); 309: {_,_} -> ok 310: end, 311: utf32_fail_range(Char+1, End); 312: utf32_fail_range(_, _) -> ok. 313: 314: bad_construction(Config) when is_list(Config) -> 315: ?FAIL(<<3.14/utf8>>), 316: ?FAIL(<<3.1415/utf16>>), 317: ?FAIL(<<3.1415/utf32>>), 318: 319: ?FAIL(<<(-1)/utf8>>), 320: ?FAIL(<<(-1)/utf16>>), 321: {'EXIT',_} = (catch <<(id(-1))/utf8>>), 322: {'EXIT',_} = (catch <<(id(-1))/utf16>>), 323: {'EXIT',_} = (catch <<(id(-1))/utf32>>), 324: 325: ?FAIL(<<16#D800/utf8>>), 326: ?FAIL(<<16#D800/utf16>>), 327: ?FAIL(<<16#D800/utf32>>), 328: 329: ok. 330: 331: %% This function intentionally allows construction of 332: %% UTF-8 sequence in illegal ranges. 333: int_to_utf8(I) when I =< 16#7F -> 334: <<I>>; 335: int_to_utf8(I) when I =< 16#7FF -> 336: B2 = I, 337: B1 = (I bsr 6), 338: <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; 339: int_to_utf8(I) when I =< 16#FFFF -> 340: B3 = I, 341: B2 = (I bsr 6), 342: B1 = (I bsr 12), 343: <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; 344: int_to_utf8(I) when I =< 16#3FFFFF -> 345: B4 = I, 346: B3 = (I bsr 6), 347: B2 = (I bsr 12), 348: B1 = (I bsr 18), 349: <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; 350: int_to_utf8(I) when I =< 16#3FFFFFF -> 351: B5 = I, 352: B4 = (I bsr 6), 353: B3 = (I bsr 12), 354: B2 = (I bsr 18), 355: B1 = (I bsr 24), 356: <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6, 357: 1:1,0:1,B5:6>>. 358: 359: %% int_to_utf8(I, NumberOfBytes) -> Binary. 360: %% This function can be used to construct overlong sequences. 361: int_to_utf8(I, 1) -> 362: <<I>>; 363: int_to_utf8(I, 2) -> 364: B2 = I, 365: B1 = (I bsr 6), 366: <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; 367: int_to_utf8(I, 3) -> 368: B3 = I, 369: B2 = (I bsr 6), 370: B1 = (I bsr 12), 371: <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; 372: int_to_utf8(I, 4) -> 373: B4 = I, 374: B3 = (I bsr 6), 375: B2 = (I bsr 12), 376: B1 = (I bsr 18), 377: <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>. 378: 379: make_unaligned(Bin0) when is_binary(Bin0) -> 380: Bin1 = <<0:3,Bin0/binary,31:5>>, 381: Sz = byte_size(Bin0), 382: <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), 383: Bin. 384: 385: fail_check({'EXIT',{badarg,_}}, Str, Vars) -> 386: try evaluate(Str, Vars) of 387: Res -> 388: io:format("Interpreted result: ~p", [Res]), 389: ?t:fail(did_not_fail_in_intepreted_code) 390: catch 391: error:badarg -> 392: ok 393: end; 394: fail_check(Res, _, _) -> 395: io:format("Compiled result: ~p", [Res]), 396: ?t:fail(did_not_fail_in_compiled_code). 397: 398: evaluate(Str, Vars) -> 399: {ok,Tokens,_} = 400: erl_scan:string(Str ++ " . "), 401: {ok, [Expr]} = erl_parse:parse_exprs(Tokens), 402: case erl_eval:expr(Expr, Vars) of 403: {value, Result, _} -> 404: Result 405: end. 406: 407: id(I) -> I. 408: