1: %%
    2: %% %CopyrightBegin%
    3: %%
    4: %% Copyright Ericsson AB 2008-2013. All Rights Reserved.
    5: %%
    6: %% The contents of this file are subject to the Erlang Public License,
    7: %% Version 1.1, (the "License"); you may not use this file except in
    8: %% compliance with the License. You should have received a copy of the
    9: %% Erlang Public License along with this software. If not, it can be
   10: %% retrieved online at http://www.erlang.org/.
   11: %%
   12: %% Software distributed under the License is distributed on an "AS IS"
   13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
   14: %% the License for the specific language governing rights and limitations
   15: %% under the License.
   16: %%
   17: %% %CopyrightEnd%
   18: %%
   19: -module(unicode_SUITE).
   20: 
   21: -include_lib("test_server/include/test_server.hrl").
   22: 
   23: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 
   24: 	 init_per_group/2,end_per_group/2,	 
   25: 	 init_per_testcase/2,
   26: 	 end_per_testcase/2,
   27: 	 utf8_illegal_sequences_bif/1,
   28: 	 utf16_illegal_sequences_bif/1,
   29: 	 random_lists/1,
   30: 	 roundtrips/1,
   31: 	 latin1/1,
   32: 	 exceptions/1, binaries_errors/1]).
   33: 	 
   34: init_per_testcase(Case, Config) when is_atom(Case), is_list(Config) ->
   35:     Dog=?t:timetrap(?t:minutes(20)),
   36:     [{watchdog, Dog}|Config].
   37: 
   38: end_per_testcase(_Case, Config) ->
   39:     Dog = ?config(watchdog, Config),
   40:     ?t:timetrap_cancel(Dog).
   41: 
   42: suite() -> [{ct_hooks,[ts_install_cth]}].
   43: 
   44: all() -> 
   45:     [utf8_illegal_sequences_bif,
   46:      utf16_illegal_sequences_bif, random_lists, roundtrips,
   47:      latin1, exceptions, binaries_errors].
   48: 
   49: groups() -> 
   50:     [].
   51: 
   52: init_per_suite(Config) ->
   53:     Config.
   54: 
   55: end_per_suite(_Config) ->
   56:     ok.
   57: 
   58: init_per_group(_GroupName, Config) ->
   59:     Config.
   60: 
   61: end_per_group(_GroupName, Config) ->
   62:     Config.
   63: 
   64: binaries_errors(Config) when is_list(Config) ->
   65:     setlimit(10),
   66:     ex_binaries_errors_utf8(Config),
   67:     setlimit(default),
   68:     ex_binaries_errors_utf8(Config),
   69:     ex_binaries_errors_utf16_little(Config),
   70:     ex_binaries_errors_utf16_big(Config),
   71:     ex_binaries_errors_utf32_little(Config),
   72:     ex_binaries_errors_utf32_big(Config).
   73:     
   74: ex_binaries_errors_utf8(Config) when is_list(Config) ->
   75:     %% Original smoke test, we should not forget the original offset...
   76:     <<_:8,_:8,RR2/binary>> = <<$a,$b,164,165,$c>>,
   77:     {error,[],<<164,165,$c>>} = unicode:characters_to_list(RR2),
   78:     %% Now, try with longer binary (trapping)
   79:     BrokenPart = list_to_binary(lists:seq(128,255)),
   80:     BrokenSz = byte_size(BrokenPart),
   81:     [ begin
   82: 	  OKList = lists:flatten(lists:duplicate(N,lists:seq(1,255))),
   83: 	  OKBin = unicode:characters_to_binary(OKList),
   84: 	  OKLen = length(OKList),
   85: 	  %% Copy to avoid that the binary get's writable
   86: 	  PartlyBroken = binary:copy(<<OKBin/binary, BrokenPart/binary>>),
   87: 	  PBSz = byte_size(PartlyBroken),
   88: 	  {error,OKList,DeepBrokenPart} = 
   89: 	      unicode:characters_to_list(PartlyBroken),
   90: 	  BrokenPart = iolist_to_binary(DeepBrokenPart),
   91: 	  [ begin
   92: 		NewList = lists:nthtail(X, OKList),
   93: 		NewSz = byte_size(unicode:characters_to_binary(NewList)) + 
   94: 		    BrokenSz,
   95: 		Chomped = binary:part(PartlyBroken,PBSz - NewSz, NewSz),
   96: 		true = (binary:referenced_byte_size(Chomped) =:= PBSz),
   97: 		{error,NewList,DeepBrokenPart2} =  
   98: 		    unicode:characters_to_list(Chomped),
   99: 		BrokenPart = iolist_to_binary(DeepBrokenPart2)
  100: 	    end || X <- lists:seq(1,OKLen) ]
  101:       end || N <- lists:seq(1,20) ],
  102:     ok.
  103: 
  104: ex_binaries_errors_utf16_little(Config) when is_list(Config) ->
  105:     BrokenPart = << <<X:16/little>> || X <- lists:seq(16#DC00,16#DFFF) >>,
  106:     BrokenSz = byte_size(BrokenPart),
  107:     [ begin
  108: 	  OKList = lists:flatten(lists:duplicate(N,lists:seq(1,255))),
  109: 	  OKBin = unicode:characters_to_binary(OKList,unicode,{utf16,little}),
  110: 	  OKLen = length(OKList),
  111: 	  %% Copy to avoid that the binary get's writable
  112: 	  PartlyBroken = binary:copy(<<OKBin/binary, BrokenPart/binary>>),
  113: 	  PBSz = byte_size(PartlyBroken),
  114: 	  {error,OKList,DeepBrokenPart} = 
  115: 	      unicode:characters_to_list(PartlyBroken,{utf16,little}),
  116: 	  BrokenPart = iolist_to_binary(DeepBrokenPart),
  117: 	  [ begin
  118: 		NewList = lists:nthtail(X, OKList),
  119: 		NewSz = byte_size(unicode:characters_to_binary(NewList,unicode,{utf16,little})) + 
  120: 		    BrokenSz,
  121: 		Chomped = binary:part(PartlyBroken,PBSz - NewSz, NewSz),
  122: 		true = (binary:referenced_byte_size(Chomped) =:= PBSz),
  123: 		{error,NewList,DeepBrokenPart2} =  
  124: 		    unicode:characters_to_list(Chomped,{utf16,little}),
  125: 		BrokenPart = iolist_to_binary(DeepBrokenPart2)
  126: 	    end || X <- lists:seq(1,OKLen) ]
  127:       end || N <- lists:seq(1,15) ],
  128:     ok.
  129: ex_binaries_errors_utf16_big(Config) when is_list(Config) ->
  130:     BrokenPart = << <<X:16/big>> || X <- lists:seq(16#DC00,16#DFFF) >>,
  131:     BrokenSz = byte_size(BrokenPart),
  132:     [ begin
  133: 	  OKList = lists:flatten(lists:duplicate(N,lists:seq(1,255))),
  134: 	  OKBin = unicode:characters_to_binary(OKList,unicode,{utf16,big}),
  135: 	  OKLen = length(OKList),
  136: 	  %% Copy to avoid that the binary get's writable
  137: 	  PartlyBroken = binary:copy(<<OKBin/binary, BrokenPart/binary>>),
  138: 	  PBSz = byte_size(PartlyBroken),
  139: 	  {error,OKList,DeepBrokenPart} = 
  140: 	      unicode:characters_to_list(PartlyBroken,{utf16,big}),
  141: 	  BrokenPart = iolist_to_binary(DeepBrokenPart),
  142: 	  [ begin
  143: 		NewList = lists:nthtail(X, OKList),
  144: 		NewSz = byte_size(unicode:characters_to_binary(NewList,unicode,{utf16,big})) + 
  145: 		    BrokenSz,
  146: 		Chomped = binary:part(PartlyBroken,PBSz - NewSz, NewSz),
  147: 		true = (binary:referenced_byte_size(Chomped) =:= PBSz),
  148: 		{error,NewList,DeepBrokenPart2} =  
  149: 		    unicode:characters_to_list(Chomped,{utf16,big}),
  150: 		BrokenPart = iolist_to_binary(DeepBrokenPart2)
  151: 	    end || X <- lists:seq(1,OKLen) ]
  152:       end || N <- lists:seq(1,15) ],
  153:     ok.
  154:     
  155: ex_binaries_errors_utf32_big(Config) when is_list(Config) ->
  156:     BrokenPart = << <<X:32/big>> || X <- lists:seq(16#DC00,16#DFFF) >>,
  157:     BrokenSz = byte_size(BrokenPart),
  158:     [ begin
  159: 	  OKList = lists:flatten(lists:duplicate(N,lists:seq(1,255))),
  160: 	  OKBin = unicode:characters_to_binary(OKList,unicode,{utf32,big}),
  161: 	  OKLen = length(OKList),
  162: 	  %% Copy to avoid that the binary get's writable
  163: 	  PartlyBroken = binary:copy(<<OKBin/binary, BrokenPart/binary>>),
  164: 	  PBSz = byte_size(PartlyBroken),
  165: 	  {error,OKList,DeepBrokenPart} = 
  166: 	      unicode:characters_to_list(PartlyBroken,{utf32,big}),
  167: 	  BrokenPart = iolist_to_binary(DeepBrokenPart),
  168: 	  [ begin
  169: 		NewList = lists:nthtail(X, OKList),
  170: 		NewSz = byte_size(unicode:characters_to_binary(NewList,unicode,{utf32,big})) + 
  171: 		    BrokenSz,
  172: 		Chomped = binary:part(PartlyBroken,PBSz - NewSz, NewSz),
  173: 		true = (binary:referenced_byte_size(Chomped) =:= PBSz),
  174: 		{error,NewList,DeepBrokenPart2} =  
  175: 		    unicode:characters_to_list(Chomped,{utf32,big}),
  176: 		BrokenPart = iolist_to_binary(DeepBrokenPart2)
  177: 	    end || X <- lists:seq(1,OKLen) ]
  178:       end || N <- lists:seq(1,15) ],
  179:     ok.
  180:     
  181: ex_binaries_errors_utf32_little(Config) when is_list(Config) ->
  182:     BrokenPart = << <<X:32/little>> || X <- lists:seq(16#DC00,16#DFFF) >>,
  183:     BrokenSz = byte_size(BrokenPart),
  184:     [ begin
  185: 	  OKList = lists:flatten(lists:duplicate(N,lists:seq(1,255))),
  186: 	  OKBin = unicode:characters_to_binary(OKList,unicode,{utf32,little}),
  187: 	  OKLen = length(OKList),
  188: 	  %% Copy to avoid that the binary get's writable
  189: 	  PartlyBroken = binary:copy(<<OKBin/binary, BrokenPart/binary>>),
  190: 	  PBSz = byte_size(PartlyBroken),
  191: 	  {error,OKList,DeepBrokenPart} = 
  192: 	      unicode:characters_to_list(PartlyBroken,{utf32,little}),
  193: 	  BrokenPart = iolist_to_binary(DeepBrokenPart),
  194: 	  [ begin
  195: 		NewList = lists:nthtail(X, OKList),
  196: 		NewSz = byte_size(unicode:characters_to_binary(NewList,unicode,{utf32,little})) + 
  197: 		    BrokenSz,
  198: 		Chomped = binary:part(PartlyBroken,PBSz - NewSz, NewSz),
  199: 		true = (binary:referenced_byte_size(Chomped) =:= PBSz),
  200: 		{error,NewList,DeepBrokenPart2} =  
  201: 		    unicode:characters_to_list(Chomped,{utf32,little}),
  202: 		BrokenPart = iolist_to_binary(DeepBrokenPart2)
  203: 	    end || X <- lists:seq(1,OKLen) ]
  204:       end || N <- lists:seq(1,15) ],
  205:     ok.
  206:     
  207: 
  208: 
  209: exceptions(Config) when is_list(Config) ->
  210:     setlimit(10),
  211:     ex_exceptions(Config),
  212:     setlimit(default),
  213:     ex_exceptions(Config).
  214: 
  215: ex_exceptions(Config) when is_list(Config) ->
  216:     ?line L = lists:seq(0,255),
  217:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L++255,unicode)),
  218:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary({1,2,3},unicode)),
  219:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1,unicode)),
  220:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1.0,unicode)),
  221:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary('1',unicode)),
  222:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,apa],unicode)),
  223:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,4.0],unicode)),
  224:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L++255,latin1)),
  225:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary({1,2,3},latin1)),
  226:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1,latin1)),
  227:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1.0,latin1)),
  228:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary('1',latin1)),
  229:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,apa],latin1)),
  230:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,4.0],latin1)),
  231:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,gnarfl)),
  232:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,L)),
  233:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,{latin1})),
  234:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,[latin1])),
  235:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,1)),
  236:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,1.0)),
  237:     Encodings = [unicode, utf8,utf16,utf32,{utf16,big},
  238: 		 {utf16,little},{utf32,big},{utf32,little}],
  239:     [ begin
  240: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L++255,unicode,Enc)),
  241: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary({1,2,3},unicode,Enc)),
  242: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1,unicode,Enc)),
  243: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1.0,unicode,Enc)),
  244: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary('1',unicode,Enc)),
  245: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,apa],unicode,
  246: 									  Enc)),
  247: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,4.0],unicode,
  248: 									  Enc)),
  249: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L++255,latin1,Enc)),
  250: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary({1,2,3},latin1,Enc)),
  251: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1,latin1,Enc)),
  252: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(1.0,latin1,Enc)),
  253: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary('1',latin1,Enc)),
  254: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,apa],latin1,
  255: 									  Enc)),
  256: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary([1,2,3,4.0],latin1,
  257: 									  Enc)),
  258: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,gnarfl,Enc)),
  259: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,L,Enc)),
  260: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,{latin1},Enc)),
  261: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,[latin1],Enc)),
  262: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,1,Enc)),
  263: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_binary(L,1.0,Enc))
  264:       end || Enc <- Encodings ],
  265: 
  266: 
  267:     Encodings2 = [latin1, unicode, utf8,utf16,utf32,{utf16,big},
  268: 		 {utf16,little},{utf32,big},{utf32,little}],
  269:     [ begin
  270: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L++255,Enc)),
  271: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list({1,2,3},Enc)),
  272: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(1,Enc)),
  273: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(1.0,Enc)),
  274: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list('1',Enc)),
  275: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list([1,2,3,apa],Enc)),
  276: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list([1,2,3,4.0],Enc)),
  277: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,{Enc})),
  278: 	  ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,[Enc]))
  279:       end || Enc <- Encodings2 ],
  280:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,gnarfl)),
  281:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,L)),
  282:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,1)),
  283:     ?line {'EXIT',{badarg,_}} = (catch unicode:characters_to_list(L,1.0)),
  284:     [ begin
  285: 	  ?line Bx = unicode:characters_to_binary(L,latin1, Enc),
  286: 	  ?line L = unicode:characters_to_list(Bx,Enc)
  287:       end || Enc <- Encodings ],
  288:     ?line B = unicode:characters_to_binary(L,latin1),
  289:     ?line L = unicode:characters_to_list(B,unicode),
  290:     ?line L = unicode:characters_to_list(list_to_binary(L),latin1),
  291:     ?line More = <<B/binary,0,1,2>>,
  292:     ?line B2 = list_to_binary([254,255]),
  293:     ?line B3 = list_to_binary([0,1,2,254,255]),
  294:     ?line {error,B,Rest1} = unicode:characters_to_binary([L,B2],unicode),
  295:     ?line B2 = iolist_to_binary(Rest1),
  296:     ?line {error,More,Rest2} = unicode:characters_to_binary([L,B3],unicode),
  297:     [ begin ?line {error,_,_} = unicode:characters_to_binary([L,B2],unicode,Enc) end
  298:       || Enc <- Encodings ],
  299:     ?line Valid0 = unicode:characters_to_binary([L,254,255],unicode),
  300:     ?line Valid1 = unicode:characters_to_binary([L,254,255],latin1),
  301:     ?line Valid2 = unicode:characters_to_binary([L,254,255,256,257],unicode),
  302:     ?line Valid3 = unicode:characters_to_binary([L,B2],latin1),
  303:     ?line true = is_binary(Valid0),
  304:     ?line true = is_binary(Valid1),
  305:     ?line true = is_binary(Valid2),
  306:     ?line true = is_binary(Valid3),
  307:     ?line Valid4 = unicode:characters_to_binary([L,B3],latin1),
  308:     ?line true = is_binary(Valid4),
  309:     ?line B2 = iolist_to_binary(Rest2),
  310:     ?line true = (L ++ [254,255] =:=  unicode:characters_to_list(Valid0,unicode)),
  311:     ?line true = (L ++ [254,255,256,257] =:=  unicode:characters_to_list(Valid2,unicode)),
  312:     lists:foreach(fun(Enco) ->
  313: 			  ?line Valid0x = unicode:characters_to_binary([L,254,255],unicode,Enco),
  314: 			  ?line Valid1x = unicode:characters_to_binary([L,254,255],latin1,Enco),
  315: 			  ?line Valid2x = unicode:characters_to_binary([L,254,255,256,257],unicode,Enco),
  316: 			  ?line Valid3x = unicode:characters_to_binary([L,B2],latin1,Enco),
  317: 			  ?line true = is_binary(Valid0x),
  318: 			  ?line true = is_binary(Valid1x),
  319: 			  ?line true = is_binary(Valid2x),
  320: 			  ?line true = is_binary(Valid3x)
  321: 
  322: 		  end, Encodings),
  323:     ok.
  324:     
  325: 
  326: latin1(Config) when is_list(Config) ->
  327:     setlimit(10),
  328:     ex_latin1(Config),
  329:     setlimit(default),
  330:     ex_latin1(Config).
  331: 
  332: ex_latin1(Config) when is_list(Config) ->
  333:     ?line All = lists:seq(0,255),
  334:     ?line AllBin = list_to_binary(All),
  335:     ?line AllUtf8 = unicode:characters_to_binary(All,latin1),
  336:     ?line AllUtf8 = unicode:characters_to_binary(AllBin,latin1),
  337:     ?line AllUtf8 = unicode:characters_to_binary([AllBin],latin1),
  338:     ?line AllUtf8 = unicode:characters_to_binary(make_unaligned(AllBin),latin1),
  339:     ?line AllUtf8 = unicode:characters_to_binary([make_unaligned(AllBin)],latin1),
  340:     ?line AllUtf8 = list_to_utf8_bsyntax([AllBin],latin1),
  341:     ?line AllUtf8 = list_to_utf8_bsyntax([make_unaligned(AllBin)],latin1),
  342:     ?line AllUtf8 = unicode_mixed_to_utf8_1(All),
  343: 
  344:     ?line AllUtf16_Big = unicode:characters_to_binary(All,latin1,utf16),
  345:     ?line AllUtf16_Big = unicode:characters_to_binary(AllBin,latin1,utf16),
  346:     ?line AllUtf16_Big = unicode:characters_to_binary([AllBin],latin1,utf16),
  347:     ?line AllUtf16_Big = unicode:characters_to_binary(make_unaligned(AllBin),latin1,utf16),
  348:     ?line AllUtf16_Big = unicode:characters_to_binary([make_unaligned(AllBin)],latin1,utf16),
  349:     ?line AllUtf16_Big = list_to_utf16_big_bsyntax([AllBin],latin1),
  350:     ?line AllUtf16_Big = list_to_utf16_big_bsyntax([make_unaligned(AllBin)],latin1),
  351: 
  352:     ?line AllUtf16_Little = unicode:characters_to_binary(All,latin1,{utf16,little}),
  353:     ?line AllUtf16_Little = unicode:characters_to_binary(AllBin,latin1,{utf16,little}),
  354:     ?line AllUtf16_Little = unicode:characters_to_binary([AllBin],latin1,{utf16,little}),
  355:     ?line AllUtf16_Little = unicode:characters_to_binary(make_unaligned(AllBin),latin1,
  356: 							 {utf16,little}),
  357:     ?line AllUtf16_Little = unicode:characters_to_binary([make_unaligned(AllBin)],latin1,
  358: 							 {utf16,little}),
  359:     ?line AllUtf16_Little = list_to_utf16_little_bsyntax([AllBin],latin1),
  360:     ?line AllUtf16_Little = list_to_utf16_little_bsyntax([make_unaligned(AllBin)],latin1),
  361: 
  362:     ?line AllUtf32_Big = unicode:characters_to_binary(All,latin1,utf32),
  363:     ?line AllUtf32_Big = unicode:characters_to_binary(AllBin,latin1,utf32),
  364:     ?line AllUtf32_Big = unicode:characters_to_binary([AllBin],latin1,utf32),
  365:     ?line AllUtf32_Big = unicode:characters_to_binary(make_unaligned(AllBin),latin1,utf32),
  366:     ?line AllUtf32_Big = unicode:characters_to_binary([make_unaligned(AllBin)],latin1,utf32),
  367:     ?line AllUtf32_Big = list_to_utf32_big_bsyntax([AllBin],latin1),
  368:     ?line AllUtf32_Big = list_to_utf32_big_bsyntax([make_unaligned(AllBin)],latin1),
  369: 
  370:     ?line AllUtf32_Little = unicode:characters_to_binary(All,latin1,{utf32,little}),
  371:     ?line AllUtf32_Little = unicode:characters_to_binary(AllBin,latin1,{utf32,little}),
  372:     ?line AllUtf32_Little = unicode:characters_to_binary([AllBin],latin1,{utf32,little}),
  373:     ?line AllUtf32_Little = unicode:characters_to_binary(make_unaligned(AllBin),latin1,
  374: 							 {utf32,little}),
  375:     ?line AllUtf32_Little = unicode:characters_to_binary([make_unaligned(AllBin)],latin1,
  376: 							 {utf32,little}),
  377:     ?line AllUtf32_Little = list_to_utf32_little_bsyntax([AllBin],latin1),
  378:     ?line AllUtf32_Little = list_to_utf32_little_bsyntax([make_unaligned(AllBin)],latin1),
  379: 
  380:     ?line DoubleUtf8 = <<AllUtf8/binary,AllUtf8/binary>>,
  381:     ?line DoubleUtf8 = unicode:characters_to_binary([All,AllBin],latin1),
  382:     ?line DoubleUtf8 = 
  383: 	unicode:characters_to_binary([All,make_unaligned(AllBin)],latin1),
  384:     ?line DoubleUtf8 = unicode:characters_to_binary([All|AllBin],latin1),
  385:     ?line DoubleUtf8 = 
  386: 	unicode:characters_to_binary([All|make_unaligned(AllBin)],latin1),
  387:     ?line DoubleUtf8 = unicode:characters_to_binary([AllBin,All],latin1),
  388:     ?line DoubleUtf8 = unicode:characters_to_binary([AllBin|All],latin1),
  389:     ?line DoubleUtf8 = list_to_utf8_bsyntax([AllBin|All],latin1), 
  390: 
  391:     ?line DoubleUtf16 = <<AllUtf16_Big/binary,AllUtf16_Big/binary>>,
  392:     ?line DoubleUtf16 = unicode:characters_to_binary([All,AllBin],latin1,{utf16,big}),
  393:     ?line DoubleUtf16 = 
  394: 	unicode:characters_to_binary([All,make_unaligned(AllBin)],latin1,{utf16,big}),
  395:     ?line DoubleUtf16 = unicode:characters_to_binary([All|AllBin],latin1,{utf16,big}),
  396:     ?line DoubleUtf16 = 
  397: 	unicode:characters_to_binary([All|make_unaligned(AllBin)],latin1,{utf16,big}),
  398:     ?line DoubleUtf16 = unicode:characters_to_binary([AllBin,All],latin1,{utf16,big}),
  399:     ?line DoubleUtf16 = unicode:characters_to_binary([AllBin|All],latin1,{utf16,big}),
  400:     ?line DoubleUtf16 = list_to_utf16_big_bsyntax([AllBin|All],latin1), 
  401: 
  402:     ?line All = unicode:characters_to_list(AllUtf8,unicode),
  403:     ?line All = unicode:characters_to_list(make_unaligned(AllUtf8),unicode),
  404:     ?line All = utf8_to_list_bsyntax(AllUtf8),
  405:     ?line AllAll = All ++ All,
  406:     ?line AllAll = unicode:characters_to_list(DoubleUtf8,unicode),
  407:     ?line AllAll = unicode:characters_to_list(make_unaligned(DoubleUtf8),unicode),
  408:     ?line AllAll = utf8_to_list_bsyntax(DoubleUtf8),
  409:     ?line {error,AllUtf8,Rest1} =  unicode:characters_to_binary(All++[16#FFF],latin1),
  410:     ?line [16#FFF] = lists:flatten(Rest1),
  411:     ?line {error,DoubleUtf8,Rest2} = 
  412: 	unicode:characters_to_binary([All,AllBin,16#FFF],latin1),
  413:     ?line {error,DoubleUtf16,Rest2x} = 
  414: 	unicode:characters_to_binary([All,AllBin,16#FFF],latin1,utf16),
  415:     ?line [16#FFF] = lists:flatten(Rest2),
  416:     ?line [16#FFF] = lists:flatten(Rest2x),
  417:     ?line {error,AllUtf8,Rest3} = 
  418: 	unicode:characters_to_binary([All,16#FFF,AllBin,16#FFF],
  419: 			    latin1),
  420:     ?line {error,AllUtf8,Rest3} = 
  421: 	unicode:characters_to_binary([All,16#FFF,make_unaligned(AllBin),16#FFF],
  422: 			    latin1),
  423:     ?line {error,AllUtf16_Big,Rest3x} = 
  424: 	unicode:characters_to_binary([All,16#FFF,AllBin,16#FFF],
  425: 			    latin1,{utf16,big}),
  426:     ?line {error,AllUtf16_Big,Rest3x} = 
  427: 	unicode:characters_to_binary([All,16#FFF,make_unaligned(AllBin),16#FFF],
  428: 			    latin1,{utf16,big}),
  429:     ?line [16#FFF,AllBin,16#FFF] = lists:flatten(Rest3),
  430:     ?line [16#FFF,AllBin,16#FFF] = lists:flatten(Rest3x),
  431:     ?line DoubleSize = byte_size(DoubleUtf8),
  432:     ?line AllBut1 = DoubleSize - 1,
  433:     ?line AllBut2 = DoubleSize - 2,
  434:     ?line <<MissingLastByte:AllBut1/binary,_>> = DoubleUtf8,
  435:     ?line <<_:AllBut2/binary,MissingStart:1/binary,_>> = DoubleUtf8,
  436:     ?line {ChompedList,_} = lists:split(length(AllAll) - 1,AllAll),
  437:     ?line {incomplete,ChompedList,MissingStart} = 
  438: 	unicode:characters_to_list(MissingLastByte,unicode),
  439:     ?line {incomplete,ChompedList,MissingStart} = 
  440: 	unicode:characters_to_list(make_unaligned(MissingLastByte),unicode),
  441: 
  442:     ?line DoubleSize16 = byte_size(DoubleUtf16),
  443:     ?line DoubleUtf16_2 = list_to_binary([DoubleUtf16,<<16#FFFFF/utf16-big>>]),
  444:     ?line DoubleSize16_2 = byte_size(DoubleUtf16_2),
  445:     ?line AllBut1_16 = DoubleSize16 - 1,
  446:     ?line AllBut2_16_2 = DoubleSize16_2 - 2,
  447:     ?line <<MissingLastBytes16:AllBut2_16_2/binary,_,_>> = DoubleUtf16_2,
  448:     ?line <<MissingLastByte16:AllBut1_16/binary,_>> = DoubleUtf16,
  449:     ?line {incomplete,AllAll,_} = 
  450: 	unicode:characters_to_list(MissingLastBytes16,utf16),
  451:     ?line {incomplete,AllAll,_} = 
  452: 	unicode:characters_to_list(make_unaligned(MissingLastBytes16),utf16),
  453:     ?line {incomplete,ChompedList,_} = 
  454: 	unicode:characters_to_list(MissingLastByte16,utf16),
  455:     ?line {incomplete,ChompedList,_} = 
  456: 	unicode:characters_to_list(make_unaligned(MissingLastByte16),utf16),
  457:     ok.
  458:     
  459: roundtrips(Config) when is_list(Config) ->
  460:     setlimit(10),
  461:     ex_roundtrips(Config),
  462:     setlimit(default),
  463:     ex_roundtrips(Config).
  464: 
  465: ex_roundtrips(Config) when is_list(Config) ->
  466:     ?line L1 = ranges(0, 16#D800 - 1, 
  467: 		      erlang:system_info(context_reductions) * 11),
  468:     ?line L2 = ranges(16#DFFF + 1, 16#10000 - 1,
  469: 		      erlang:system_info(context_reductions) * 11),
  470:     %?line L3 = ranges(16#FFFF + 1, 16#10FFFF, 
  471:     %		      erlang:system_info(context_reductions) * 11),
  472:     ?line L3 = ranges(16#FFFFF, 16#10FFFF, 
  473: 		      erlang:system_info(context_reductions) * 11),
  474:     ?line L = L1 ++ L2 ++ L3,
  475:     ?line LLen = length(L),
  476:     ?line Parts = erlang:system_info(schedulers),
  477:     ?line Lists = splitup(L,LLen,Parts),
  478:     ?line PidRefs = [spawn_monitor(fun() ->
  479: 					   do_roundtrips(MyPart)
  480: 				   end) || MyPart <- Lists],
  481:     ?line [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
  482: 	      {Pid,Ref} <- PidRefs],
  483:     ok.
  484: 
  485: do_roundtrips([]) ->
  486:     ok;
  487: do_roundtrips([{Start,Stop}|T]) ->
  488:     erlang:display({Start,Stop}),
  489:     List = lists:seq(Start,Stop),
  490:     Utf = unicode:characters_to_binary(List,unicode),
  491:     Utf16_Big = unicode:characters_to_binary(List,unicode,{utf16,big}),
  492:     Utf16_Little = unicode:characters_to_binary(List,unicode,{utf16,little}),
  493:     Utf32_Big = unicode:characters_to_binary(List,unicode,{utf32,big}),
  494:     Utf32_Little = unicode:characters_to_binary(List,unicode,{utf32,little}),
  495: 
  496:     Utf = unicode:characters_to_binary([Utf],unicode),
  497:     Utf16_Big = unicode:characters_to_binary([Utf16_Big],{utf16,big},{utf16,big}),
  498:     Utf16_Little = unicode:characters_to_binary([Utf16_Little],{utf16,little},{utf16,little}),
  499:     Utf32_Big = unicode:characters_to_binary([Utf32_Big],{utf32,big},{utf32,big}),
  500:     Utf32_Little = unicode:characters_to_binary([Utf32_Little],{utf32,little},{utf32,little}),
  501: 
  502:     Utf = list_to_utf8_bsyntax(List,unicode),
  503:     Utf16_Big = list_to_utf16_big_bsyntax(List,{utf16,big}),
  504:     Utf16_Little = list_to_utf16_little_bsyntax(List,{utf16,little}),
  505:     Utf32_Big = list_to_utf32_big_bsyntax(List,{utf32,big}),
  506:     Utf32_Little = list_to_utf32_little_bsyntax(List,{utf32,little}),
  507: 
  508:     Utf = unicode_mixed_to_utf8_1(List),
  509: 
  510:     List = unicode:characters_to_list(Utf,unicode),
  511:     List = unicode:characters_to_list(Utf16_Big,{utf16,big}),
  512:     List = unicode:characters_to_list(Utf16_Little,{utf16,little}),
  513:     List = unicode:characters_to_list(Utf32_Big,{utf32,big}),
  514:     List = unicode:characters_to_list(Utf32_Little,{utf32,little}),
  515:     List = utf8_to_list_bsyntax(Utf),
  516:     List = utf16_big_to_list_bsyntax(Utf16_Big),
  517:     List = utf16_little_to_list_bsyntax(Utf16_Little),
  518:     List = utf32_big_to_list_bsyntax(Utf32_Big),
  519:     List = utf32_little_to_list_bsyntax(Utf32_Little),
  520:     List = utf8_to_list(Utf),
  521:     List = utf16_big_to_list(Utf16_Big),
  522:     List = utf16_little_to_list(Utf16_Little),
  523:     List = utf32_big_to_list(Utf32_Big),
  524:     List = utf32_little_to_list(Utf32_Little),
  525:     do_roundtrips(T).
  526: 
  527: 
  528: random_lists(Config) when is_list(Config) ->
  529:     setlimit(10),
  530:     ex_random_lists(Config),
  531:     setlimit(default),
  532:     ex_random_lists(Config).
  533: ex_random_lists(Config) when is_list(Config) ->
  534:     PlainFlatten1 = fun(L) ->
  535: 			    unicode:characters_to_binary(flat(L),latin1)
  536: 		    end,
  537:     PlainFlatten2 = fun(L) ->
  538: 			    unicode:characters_to_binary(L,latin1)
  539: 		    end,
  540:     PlainFlatten3 = fun(L) ->
  541: 			    unicode:characters_to_binary(flatb(L),latin1)
  542: 		    end,
  543:     PlainFlatten4 = fun(L) ->
  544: 			    iolist_to_binary([int_to_utf8(X) || X <- unicode:characters_to_list(flatb(L),latin1)])
  545: 		    end,
  546:     ?line random_iolist:run(150, PlainFlatten1, PlainFlatten3),
  547:     ?line random_iolist:run(150, PlainFlatten2, PlainFlatten3),
  548:     ?line random_iolist:run(150, PlainFlatten1, PlainFlatten2),
  549:     ?line random_iolist:run(150, PlainFlatten1, PlainFlatten4),
  550:     SelfMade = fun(L) ->
  551: 		       iolist_to_binary(lists:map(fun(X) ->
  552: 							  int_to_utf8(X)
  553: 						  end,
  554: 						  flatb(L)))
  555: 	       end,
  556:     SelfMadeA = fun(L) -> 
  557: 			case (catch list_to_utf8_bsyntax(L,latin1)) of
  558: 			    {'EXIT', Reason} ->
  559: 				io:format("Exit: ~p (~w)~n",[Reason,L]),
  560: 				exit(Reason);
  561: 			    Other ->
  562: 				Other
  563: 			end
  564: 		end,
  565:     ?line random_iolist:run(150, PlainFlatten1, SelfMade),
  566:     ?line random_iolist:run(150, PlainFlatten2, SelfMadeA),
  567: 
  568:     RoundTrip11 = fun(L) ->
  569: 			 unicode:characters_to_list(unicode:characters_to_binary(L,latin1),unicode)
  570: 		 end,
  571:     RoundTrip21 = fun(L) ->
  572: 			 utf8_to_list_bsyntax(unicode:characters_to_binary(L,latin1))
  573: 		 end,
  574:     RoundTrip31 = fun(L) ->
  575: 			 unicode:characters_to_list(list_to_utf8_bsyntax(L,latin1),unicode)
  576: 		 end,
  577:     RoundTrip41 = fun(L) ->
  578: 			 utf8_to_list_bsyntax(list_to_utf8_bsyntax(L,latin1))
  579: 		 end,
  580:     RoundTrip51 = fun(L) ->
  581: 			 unicode:characters_to_list(L,latin1)
  582: 		 end,
  583:     ?line random_iolist:run(150, RoundTrip11,RoundTrip21),
  584:     ?line random_iolist:run(150, RoundTrip21,RoundTrip31),
  585:     ?line random_iolist:run(150, RoundTrip31,RoundTrip41),
  586:     ?line random_iolist:run(150, RoundTrip11,RoundTrip41),
  587:     ?line random_iolist:run(150, RoundTrip21,RoundTrip41),
  588:     ?line random_iolist:run(150, RoundTrip11,RoundTrip31),
  589:     ?line random_iolist:run(150, RoundTrip11,RoundTrip51),
  590: 
  591: 
  592:     UniFlatten1 = fun(L) ->
  593: 			  unicode:characters_to_binary(flat(L),unicode)
  594: 		  end,
  595:     UniFlatten2 = fun(L) ->
  596: 			    unicode:characters_to_binary(L,unicode)
  597: 		  end,
  598:     UniFlatten3 = fun(L) ->
  599: 			    unicode:characters_to_binary(flatx(L),unicode)
  600: 		  end,
  601:     UniFlatten4 = fun(L) ->
  602: 			    unicode:characters_to_binary(unicode:characters_to_list(L,unicode),unicode)
  603: 		  end,
  604:     ?line random_unicode_list:run(150, UniFlatten1,UniFlatten2),
  605:     ?line random_unicode_list:run(150, UniFlatten1,UniFlatten3),
  606:     ?line random_unicode_list:run(150, UniFlatten2,UniFlatten4),
  607:     ?line random_unicode_list:run(150, UniFlatten2,UniFlatten3),
  608: 
  609:     ?line Encodings = [utf8,{utf16,big},
  610: 		       {utf16,little},{utf32,big},{utf32,little}],
  611:     lists:foreach(fun(OutEnc1) ->
  612: 			  lists:foreach(fun(InEnc1) -> 
  613: 						Uni16BigFlatten1 = fun(L) ->
  614: 									   unicode:characters_to_binary(flat(L),InEnc1,OutEnc1)
  615: 								   end,
  616: 						Uni16BigFlatten2 = fun(L) ->
  617: 									   unicode:characters_to_binary(L,InEnc1,OutEnc1)
  618: 								   end,
  619: 						Uni16BigFlatten3 = fun(L) ->
  620: 									   unicode:characters_to_binary(flatx(L),InEnc1,OutEnc1)
  621: 								   end,
  622: 						Uni16BigFlatten4 = fun(L) ->
  623: 									   unicode:characters_to_binary(unicode:characters_to_list(L,InEnc1),InEnc1,OutEnc1)
  624: 								   end,
  625: 						%erlang:display({InEnc1,OutEnc1}),
  626: 						?line random_unicode_list:run(150, Uni16BigFlatten1,Uni16BigFlatten2,InEnc1),
  627: 						?line random_unicode_list:run(150, Uni16BigFlatten1,Uni16BigFlatten3,InEnc1),
  628: 						?line random_unicode_list:run(150, Uni16BigFlatten2,Uni16BigFlatten4,InEnc1),
  629: 						?line random_unicode_list:run(150, Uni16BigFlatten2,Uni16BigFlatten3,InEnc1)
  630: 					end, Encodings)
  631: 		  end, Encodings),
  632:     SelfMade1 = fun(L) ->
  633: 			unicode_mixed_to_utf8_1(L)
  634: 		end,
  635:     SelfMade2 = fun(L) ->
  636: 			unicode_mixed_to_utf8_2(L)
  637: 		end,
  638:     SelfMade3 = fun(L) ->
  639: 			list_to_utf8_bsyntax(L,unicode)
  640: 		end,
  641:     ?line random_unicode_list:run(150, SelfMade1,SelfMade2),
  642:     ?line random_unicode_list:run(150, UniFlatten2, SelfMade1),
  643:     ?line random_unicode_list:run(150, UniFlatten2, SelfMade2),
  644:     ?line random_unicode_list:run(150, UniFlatten2, SelfMade3),
  645:     RoundTrip1 = fun(L) ->
  646: 			 unicode:characters_to_list(unicode:characters_to_binary(L,unicode),unicode)
  647: 		 end,
  648:     RoundTrip2 = fun(L) ->
  649: 			 utf8_to_list_bsyntax(unicode:characters_to_binary(L,unicode))
  650: 		 end,
  651:     RoundTrip3 = fun(L) ->
  652: 			 unicode:characters_to_list(list_to_utf8_bsyntax(L,unicode),unicode)
  653: 		 end,
  654:     RoundTrip4 = fun(L) ->
  655: 			 utf8_to_list_bsyntax(list_to_utf8_bsyntax(L,unicode))
  656: 		 end,
  657:     ?line random_unicode_list:run(150, RoundTrip1,RoundTrip2),
  658:     ?line random_unicode_list:run(150, RoundTrip2,RoundTrip3),
  659:     ?line random_unicode_list:run(150, RoundTrip3,RoundTrip4),
  660:     ?line random_unicode_list:run(150, RoundTrip1,RoundTrip4),
  661:     ?line random_unicode_list:run(150, RoundTrip2,RoundTrip4),
  662:     ?line random_unicode_list:run(150, RoundTrip1,RoundTrip3),
  663:     lists:foreach(fun(OutEnc2) ->
  664: 			  lists:foreach(fun(InEnc2) ->
  665: 						RoundTripUtf16_Big_1 = fun(L) ->
  666: 									       unicode:characters_to_list(unicode:characters_to_binary(L,InEnc2,OutEnc2),OutEnc2)
  667: 								       end,
  668: 						RoundTripUtf16_Big_2 = fun(L) ->
  669: 									       x_to_list_bsyntax(OutEnc2,unicode:characters_to_binary(L,InEnc2,OutEnc2))
  670: 								       end,
  671: 						RoundTripUtf16_Big_3 = fun(L) ->
  672: 									       unicode:characters_to_list(list_to_x_bsyntax(InEnc2,L,InEnc2),InEnc2)
  673: 								       end,
  674: 						RoundTripUtf16_Big_4 = fun(L) ->
  675: 									       x_to_list_bsyntax(InEnc2,list_to_x_bsyntax(InEnc2,L,InEnc2))
  676: 								       end,
  677: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_1,RoundTripUtf16_Big_2,InEnc2),
  678: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_2,RoundTripUtf16_Big_3,InEnc2),
  679: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_3,RoundTripUtf16_Big_4,InEnc2),
  680: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_1,RoundTripUtf16_Big_4,InEnc2),
  681: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_2,RoundTripUtf16_Big_4,InEnc2),
  682: 						?line random_unicode_list:run(150, RoundTripUtf16_Big_1,RoundTripUtf16_Big_3,InEnc2)
  683: 					end, Encodings)
  684: 		  end, Encodings),
  685:     ToList1 = fun(L) ->
  686: 		      unicode:characters_to_list(L,unicode)
  687: 	      end,
  688:     ToList2 = fun(L) ->
  689: 		      unicode:characters_to_list(unicode:characters_to_binary(L,unicode),unicode)
  690: 	      end,
  691:     ToList3 = fun(L) ->
  692: 		      unicode:characters_to_list(unicode_mixed_to_utf8_2(L),unicode)
  693: 	      end,
  694:     ToList4 = fun(L) ->
  695: 		      utf8_to_list(unicode_mixed_to_utf8_2(L))
  696: 	      end,
  697:     ?line random_unicode_list:run(150, ToList1,ToList2),
  698:     ?line random_unicode_list:run(150, ToList2,ToList3),
  699:     ?line random_unicode_list:run(150, ToList3,ToList4),
  700:     ?line random_unicode_list:run(150, ToList1,ToList4),
  701:     ?line random_unicode_list:run(150, ToList2,ToList4),
  702:     ?line random_unicode_list:run(150, ToList1,ToList3),
  703: 
  704:     ok.
  705: 
  706: utf16_illegal_sequences_bif(Config) when is_list(Config) ->
  707:     setlimit(10),
  708:     ex_utf16_illegal_sequences_bif(Config),
  709:     setlimit(default),
  710:     ex_utf16_illegal_sequences_bif(Config).
  711: 
  712: ex_utf16_illegal_sequences_bif(Config) when is_list(Config) ->
  713:     ?line utf16_fail_range_bif_simple(16#10FFFF+1, 16#10FFFF+512), %Too large.
  714:     ?line utf16_fail_range_bif(16#D800, 16#DFFF),		%Reserved for UTF-16.
  715: 
  716:     ?line lonely_hi_surrogate_bif(16#D800, 16#DBFF,incomplete),
  717:     ?line lonely_hi_surrogate_bif(16#DC00, 16#DFFF,error),
  718:     ?line leading_lo_surrogate_bif(16#DC00, 16#DFFF),
  719:     
  720:     ok.
  721: 
  722: utf16_fail_range_bif(Char, End) when Char =< End ->
  723:     {error,_,_} = unicode:characters_to_binary([Char],{utf16,big}),
  724:     BigBin = int_to_utf16_big(Char),
  725:     fail_bif(BigBin,{utf16,big}),
  726:     {error,_,_} = unicode:characters_to_binary([Char],{utf16,little}),
  727:     LittleBin = int_to_utf16_little(Char),
  728:     fail_bif(LittleBin,{utf16,little}),
  729:     utf16_fail_range_bif(Char+1, End);
  730: utf16_fail_range_bif(_, _) -> ok.
  731: 
  732: utf16_fail_range_bif_simple(Char, End) when Char =< End ->
  733:     {error,_,_} = unicode:characters_to_binary([Char],{utf16,big}),
  734:     {error,_,_} = unicode:characters_to_binary([Char],{utf16,little}),
  735:     utf16_fail_range_bif_simple(Char+1, End);
  736: utf16_fail_range_bif_simple(_, _) -> ok.
  737: 
  738: 
  739: lonely_hi_surrogate_bif(Char, End, EType) when Char =< End ->
  740:     BinBig = <<Char:16/big>>,
  741:     BinLittle = <<Char:16/little>>,
  742:     case unicode:characters_to_binary(BinBig,{utf16,big}) of
  743: 	{EType,_,_} -> 
  744: 	    ok;
  745: 	Other ->
  746: 	    exit({lonely_hi_surrogate_accepted,BinBig,{utf16,big},Other})
  747:     end,
  748:     case unicode:characters_to_binary(BinLittle,{utf16,little}) of
  749: 	{EType,_,_} -> 
  750: 	    ok;
  751: 	Other2 ->
  752: 	    exit({lonely_hi_surrogate_accepted,BinLittle,{utf16,little},Other2})
  753:     end,
  754:     lonely_hi_surrogate_bif(Char+1, End, EType);
  755: lonely_hi_surrogate_bif(_, _, _) -> ok.
  756: 
  757: leading_lo_surrogate_bif(Char, End) when Char =< End ->
  758:     leading_lo_surrogate_bif(Char, 16#D800, 16#DFFF),
  759:     leading_lo_surrogate_bif(Char+1, End);
  760: leading_lo_surrogate_bif(_, _) -> ok.
  761: 
  762: leading_lo_surrogate_bif(HiSurr, LoSurr, End) when LoSurr =< End ->
  763:     BinBig = <<HiSurr:16/big,LoSurr:16/big>>,
  764:     BinLittle = <<HiSurr:16/little,LoSurr:16/little>>,
  765:     case unicode:characters_to_binary(BinBig,{utf16,big}) of
  766: 	{error,_,_} -> 
  767: 	    ok;
  768: 	Other ->
  769: 	    exit({leading_lo_surrogate_accepted,BinBig,{utf16,big},Other})
  770:     end,
  771:     case unicode:characters_to_binary(BinLittle,{utf16,little}) of
  772: 	{error,_,_} -> 
  773: 	    ok;
  774: 	Other2 ->
  775: 	    exit({leading_lo_surrogate_accepted,BinLittle,{utf16,little},Other2})
  776:     end,
  777:     leading_lo_surrogate_bif(HiSurr, LoSurr+1, End);
  778: leading_lo_surrogate_bif(_, _, _) -> ok.
  779: 
  780: utf8_illegal_sequences_bif(Config) when is_list(Config) ->
  781:     setlimit(10),
  782:     ex_utf8_illegal_sequences_bif(Config),
  783:     setlimit(default),
  784:     ex_utf8_illegal_sequences_bif(Config).
  785: 
  786: ex_utf8_illegal_sequences_bif(Config) when is_list(Config) ->
  787:     ?line fail_range_bif(16#10FFFF+1, 16#10FFFF+512), %Too large.
  788:     ?line fail_range_bif(16#D800, 16#DFFF),		%Reserved for UTF-16.
  789: 
  790:     %% Illegal first character.
  791:     ?line [fail_bif(<<I,16#8F,16#8F,16#8F>>,unicode) || I <- lists:seq(16#80, 16#BF)],
  792: 
  793:     %% Short sequences.
  794:     ?line short_sequences_bif(16#80, 16#10FFFF),
  795: 
  796:     %% Overlong sequences. (Using more bytes than necessary
  797:     %% is not allowed.)
  798:     ?line overlong_bif(0, 127, 2),
  799:     ?line overlong_bif(128, 16#7FF, 3),
  800:     ?line overlong_bif(16#800, 16#FFFF, 4),
  801:     ok.
  802: 
  803: fail_range_bif(Char, End) when Char =< End ->
  804:     {error,_,_} = unicode:characters_to_binary([Char],unicode),
  805:     {error,_,_} = unicode:characters_to_binary([Char],unicode,utf16),
  806:     {error,_,_} = unicode:characters_to_binary([Char],unicode,utf32),
  807:     Bin = int_to_utf8(Char),
  808:     fail_bif(Bin,unicode),
  809:     fail_range_bif(Char+1, End);
  810: fail_range_bif(_, _) -> ok.
  811: 
  812: short_sequences_bif(Char, End) ->
  813:     Step = (End - Char) div erlang:system_info(schedulers) + 1,
  814: %    Step = (End - Char) + 1,
  815:     PidRefs = short_sequences_bif_1(Char, Step, End),
  816:     [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
  817: 	{Pid,Ref} <- PidRefs],
  818:     ok.
  819: 
  820: short_sequences_bif_1(Char, Step, End) when Char =< End ->
  821:     CharEnd = lists:min([Char+Step-1,End]),
  822:     [spawn_monitor(fun() ->
  823: 			   io:format("~p - ~p\n", [Char,CharEnd]),
  824: 			   do_short_sequences_bif(Char, CharEnd)
  825: 		   end)|short_sequences_bif_1(Char+Step, Step, End)];
  826: short_sequences_bif_1(_, _, _) -> [].
  827: 
  828: do_short_sequences_bif(Char, End) when Char =< End ->
  829:     short_sequence_bif(Char),
  830:     do_short_sequences_bif(Char+1, End);
  831: do_short_sequences_bif(_, _) -> ok.
  832: 
  833: short_sequence_bif(I) ->
  834:     case int_to_utf8(I) of
  835: 	<<S0:3/binary,_:8>> ->
  836: 	    <<S1:2/binary,R1:8>> = S0,
  837: 	    <<S2:1/binary,_:8>> = S1,
  838: 	    incomplete_bif(S0,S0),
  839: 	    incomplete_bif(S1,S1),
  840: 	    incomplete_bif(S2,S2),
  841: 	    only_fail_bif(<<S2/binary,16#7F,R1,R1>>,unicode),
  842: 	    only_fail_bif(<<S1/binary,16#7F,R1>>,unicode),
  843: 	    only_fail_bif(<<S0/binary,16#7F>>,unicode);
  844: 	<<S0:2/binary,_:8>> ->
  845: 	    <<S1:1/binary,R1:8>> = S0,
  846: 	    incomplete_bif(S0,S0),
  847: 	    incomplete_bif(S1,S1),
  848: 	    only_fail_bif(<<S0/binary,16#7F>>,unicode),
  849: 	    only_fail_bif(<<S1/binary,16#7F>>,unicode),
  850: 	    only_fail_bif(<<S1/binary,16#7F,R1>>,unicode);
  851: 	<<S:1/binary,_:8>> ->
  852: 	    incomplete_bif(S,S),
  853: 	    only_fail_bif(<<S/binary,16#7F>>,unicode)
  854:     end.
  855: 
  856: 
  857: overlong_bif(Char, Last, NumBytes) when Char =< Last ->
  858:     overlong_bif(Char, NumBytes),
  859:     overlong_bif(Char+1, Last, NumBytes);
  860: overlong_bif(_, _, _) -> ok.
  861: 
  862: overlong_bif(Char, NumBytes) when NumBytes < 5 ->
  863:     case unicode:characters_to_binary([int_to_utf8(Char, NumBytes)],unicode) of
  864: 	{error,_,_} -> 
  865: 	    ok;
  866: 	Other->
  867: 	    exit({illegal_encoding_accepted,Char,NumBytes,Other})
  868:     end,
  869:     overlong_bif(Char, NumBytes+1);
  870: overlong_bif(_, _) -> ok.
  871: 
  872: incomplete_bif(Bin,Tail) ->
  873:     incomplete_bif_1(Bin,Tail),
  874:     incomplete_bif_1(make_unaligned(Bin),Tail),
  875:     incomplete_bif_r_1(Bin,Tail),
  876:     incomplete_bif_r_1(make_unaligned(Bin),Tail),
  877:     ok.
  878: 
  879: incomplete_bif_1(Bin,Tail) ->
  880:     case unicode:characters_to_binary([Bin],unicode) of
  881: 	{incomplete,_,Tail} ->
  882: 	    case unicode:characters_to_binary(Bin,unicode) of
  883: 		{incomplete,_,Tail} ->
  884: 		    ok;
  885: 		Other0 ->
  886: 		    exit({incomplete_encoding_accepted,Bin,Other0})
  887: 	    end;
  888: 	Other ->
  889: 	    exit({incomplete_encoding_accepted,[Bin],Other})
  890:     end.
  891: incomplete_bif_r_1(Bin,Tail) ->
  892:     case unicode:characters_to_list([Bin],unicode) of
  893: 	{incomplete,_,Tail} ->
  894: 	    case unicode:characters_to_list(Bin,unicode) of
  895: 		{incomplete,_,Tail} ->
  896: 		    ok;
  897: 		Other ->
  898: 		    exit({incomplete_encoding_accepted_r,[Bin],Other})
  899: 	    end;
  900: 	Other ->
  901: 	    exit({incomplete_encoding_accepted_r,[Bin],Other})
  902:     end.
  903: 
  904: only_fail_bif(Bin,Coding) ->
  905:     only_fail_bif_1(Bin,Coding),
  906:     only_fail_bif_1(make_unaligned(Bin),Coding),
  907:     only_fail_bif_r_1(Bin,Coding),
  908:     only_fail_bif_r_1(make_unaligned(Bin),Coding),
  909:     ok.
  910: 
  911: only_fail_bif_r_1(Bin,Coding) ->
  912:     case unicode:characters_to_list([Bin],Coding) of
  913: 	{error,_,_} ->
  914: 	    case unicode:characters_to_list(Bin,Coding) of
  915: 		{error,_,_} ->
  916: 		    ok;
  917: 		Other ->
  918: 		    exit({faulty_encoding_accepted_r,Bin,Coding,Other})
  919: 	    end;
  920: 	Other ->
  921: 	    exit({faulty_encoding_accepted_r,Bin,Coding,Other})
  922:     end.
  923: only_fail_bif_1(Bin,Coding) ->
  924:     case unicode:characters_to_binary([Bin],Coding) of
  925: 	{error,_,_} ->
  926: 	    case unicode:characters_to_binary(Bin,Coding) of
  927: 		{error,_,_} ->
  928: 		    ok;
  929: 		Other0 ->
  930: 		    exit({faulty_encoding_accepted,Bin,Coding,Other0})
  931: 	    end;
  932: 	Other ->
  933: 	    exit({faulty_encoding_accepted,[Bin],Coding,Other})
  934:     end.
  935:     
  936: 	    
  937: 
  938: 
  939: fail_bif(Bin,Coding) ->
  940:     fail_bif_1(Bin,Coding),
  941:     fail_bif_1(make_unaligned(Bin),Coding),
  942:     fail_bif_r_1(Bin,Coding),
  943:     fail_bif_r_1(make_unaligned(Bin),Coding),
  944:     ok.
  945: fail_bif_r_1(Bin,Coding) ->
  946:     case unicode:characters_to_list(Bin,Coding) of
  947: 	L when is_list(L) ->
  948: 	    exit({illegal_encoding_accepted,Bin,Coding});
  949: 	_ ->
  950: 	    ok
  951:     end.
  952: 
  953: fail_bif_1(Bin,Coding) ->
  954:     case unicode:characters_to_binary([Bin],Coding) of
  955: 	Bin2 when is_binary(Bin2) ->
  956: 	    exit({illegal_encoding_accepted,Bin,Coding});
  957: 	_ ->
  958: 	    ok
  959:     end.
  960: 
  961: %%
  962: %% Diverse utilities
  963: %%
  964: 
  965: ranges(X,Y,_N) when X >= Y ->
  966:     [];
  967: ranges(X,Y,N) when X + N > Y ->
  968:     [{X,Y}];
  969: ranges(X,Y,N) ->
  970:     Upper = X+N,
  971:     [{X,Upper}|ranges(Upper+1,Y,N)].
  972: 
  973: splitup(L,_Len,1) ->
  974:     [L];
  975: splitup(L,Len,Parts) ->
  976:     Num = Len div Parts,
  977:     {A,B} = lists:split(Num,L),
  978:     [A| splitup(B,Len - Num,Parts - 1)].
  979: 
  980: flat(List) ->
  981:     lists:reverse(flat(List,[])).
  982: 
  983: flat([H|T],Acc) ->
  984:     NewAcc = flat(H,Acc),
  985:     flat(T,NewAcc);
  986: flat([],Acc) ->
  987:     Acc;
  988: flat(X,Acc) ->
  989:     [X|Acc].
  990: 
  991: flatb(List) ->
  992:     lists:reverse(flatb(List,[])).
  993: 
  994: flatb(<<X:8,Rest/binary>>,Acc) ->
  995:     flatb(Rest,[X|Acc]);
  996: flatb(<<>>,Acc) ->
  997:     Acc;
  998: flatb([H|T],Acc) ->
  999:     NewAcc = flatb(H,Acc),
 1000:     flatb(T,NewAcc);
 1001: flatb([],Acc) ->
 1002:     Acc;
 1003: flatb(X,Acc) ->
 1004:     [X|Acc].
 1005: flatx(List) ->
 1006:     lists:reverse(flatx(List,[])).
 1007: 
 1008: flatx([B1,B2|T],Acc) when is_binary(B1), is_binary(B2) ->
 1009:     flatx([<<B1/binary,B2/binary>>|T],Acc);
 1010: flatx([H|T],Acc) ->
 1011:     NewAcc = flatx(H,Acc),
 1012:     flatx(T,NewAcc);
 1013: flatx([],Acc) ->
 1014:     Acc;
 1015: flatx(X,Acc) ->
 1016:     [X|Acc].
 1017: 
 1018: 
 1019: unicode_mixed_to_utf8_1(L) ->
 1020:     Flist = flatx([L]),
 1021:     ExpList = [ case is_binary(E) of
 1022: 		    true ->
 1023: 			utf8_to_list(E);
 1024: 		    false ->
 1025: 			E
 1026: 		end || E <- Flist ],
 1027:     iolist_to_binary([int_to_utf8(I) || I <- flat(ExpList)]).
 1028: 
 1029: unicode_mixed_to_utf8_2(L) ->
 1030:     Flist = flatx([L]),
 1031:     ExpList = [ case is_binary(E) of
 1032: 		    true ->
 1033: 			E;
 1034: 		    false ->
 1035: 			int_to_utf8(E)
 1036: 		end || E <- Flist ],
 1037:     iolist_to_binary([ExpList]).
 1038:     
 1039:     
 1040:     
 1041: 
 1042: utf8_to_list_bsyntax(<<>>) ->
 1043:     [];
 1044: utf8_to_list_bsyntax(<<C/utf8,R/binary>>) ->
 1045:     [C|utf8_to_list_bsyntax(R)].
 1046: 
 1047: list_to_utf8_bsyntax(List,unicode) ->
 1048:     FList = flatx(List),
 1049:     list_to_binary([ if
 1050: 			 is_binary(E) ->
 1051: 			     E;
 1052: 			 true ->
 1053: 			     <<E/utf8>>
 1054: 		     end || E <- FList ]);
 1055: list_to_utf8_bsyntax(List,latin1) ->
 1056:     FList = flatb(List),
 1057:     list_to_binary([ <<E/utf8>> || E <- FList ]).
 1058: 
 1059:     
 1060:     
 1061: 
 1062: 
 1063: %%
 1064: %% Conversion utilities
 1065: %%
 1066: 
 1067: int_to_utf16_big(U) when U < 16#10000 ->
 1068:     <<U:16/big>>;
 1069: int_to_utf16_big(U) when U >= 16#10000, U =< 16#10FFFF ->
 1070:     UPrim = U - 16#10000,
 1071:     HI = (16#D800 bor (UPrim bsr 10)),
 1072:     LO = (16#DC00 bor (UPrim band 16#3FF)),
 1073:     <<HI:16/big,LO:16/big>>.
 1074: 
 1075: int_to_utf16_little(U) when U < 16#10000 ->
 1076:     <<U:16/little>>;
 1077: int_to_utf16_little(U) when U >= 16#10000, U =< 16#10FFFF ->
 1078:     UPrim = U - 16#10000,
 1079:     HI = (16#D800 bor (UPrim bsr 10)),
 1080:     LO = (16#DC00 bor (UPrim band 16#3FF)),
 1081:     <<HI:16/little,LO:16/little>>.
 1082: 
 1083:     
 1084: %% This function intentionally allows construction of
 1085: %% UTF-8 sequence in illegal ranges.
 1086: int_to_utf8(I) when I =< 16#7F ->
 1087:     <<I>>;
 1088: int_to_utf8(I) when I =< 16#7FF ->
 1089:     B2 = I,
 1090:     B1 = (I bsr 6),
 1091:     <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
 1092: int_to_utf8(I) when I =< 16#FFFF ->
 1093:     B3 = I,
 1094:     B2 = (I bsr 6),
 1095:     B1 = (I bsr 12),
 1096:     <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
 1097: int_to_utf8(I) when I =< 16#3FFFFF ->
 1098:     B4 = I,
 1099:     B3 = (I bsr 6),
 1100:     B2 = (I bsr 12),
 1101:     B1 = (I bsr 18),
 1102:     <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>;
 1103: int_to_utf8(I) when I =< 16#3FFFFFF ->
 1104:     B5 = I,
 1105:     B4 = (I bsr 6),
 1106:     B3 = (I bsr 12),
 1107:     B2 = (I bsr 18),
 1108:     B1 = (I bsr 24),
 1109:     <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6,
 1110:      1:1,0:1,B5:6>>.
 1111: 
 1112: utf16_big_to_list_bsyntax(<<>>) ->
 1113:     [];
 1114: utf16_big_to_list_bsyntax(<<C/utf16-big,R/binary>>) ->
 1115:     [C|utf16_big_to_list_bsyntax(R)].
 1116: 
 1117: list_to_utf16_big_bsyntax(List,{utf16,big}) ->
 1118:     FList = flatx(List),
 1119:     list_to_binary([ if
 1120: 			 is_binary(E) ->
 1121: 			     E;
 1122: 			 true ->
 1123: 			     <<E/utf16-big>>
 1124: 		     end || E <- FList ]);
 1125: list_to_utf16_big_bsyntax(List,latin1) ->
 1126:     FList = flatb(List),
 1127:     list_to_binary([ <<E/utf16-big>> || E <- FList ]).
 1128: 
 1129: 
 1130: utf16_little_to_list_bsyntax(<<>>) ->
 1131:     [];
 1132: utf16_little_to_list_bsyntax(<<C/utf16-little,R/binary>>) ->
 1133:     [C|utf16_little_to_list_bsyntax(R)].
 1134: 
 1135: list_to_utf16_little_bsyntax(List,{utf16,little}) ->
 1136:     FList = flatx(List),
 1137:     list_to_binary([ if
 1138: 			 is_binary(E) ->
 1139: 			     E;
 1140: 			 true ->
 1141: 			     <<E/utf16-little>>
 1142: 		     end || E <- FList ]);
 1143: list_to_utf16_little_bsyntax(List,latin1) ->
 1144:     FList = flatb(List),
 1145:     list_to_binary([ <<E/utf16-little>> || E <- FList ]).
 1146: 
 1147: 
 1148:     
 1149: utf32_big_to_list_bsyntax(<<>>) ->
 1150:     [];
 1151: utf32_big_to_list_bsyntax(<<C/utf32-big,R/binary>>) ->
 1152:     [C|utf32_big_to_list_bsyntax(R)].
 1153: 
 1154: list_to_utf32_big_bsyntax(List,{utf32,big}) ->
 1155:     FList = flatx(List),
 1156:     list_to_binary([ if
 1157: 			 is_binary(E) ->
 1158: 			     E;
 1159: 			 true ->
 1160: 			     <<E/utf32-big>>
 1161: 		     end || E <- FList ]);
 1162: list_to_utf32_big_bsyntax(List,latin1) ->
 1163:     FList = flatb(List),
 1164:     list_to_binary([ <<E/utf32-big>> || E <- FList ]).
 1165: 
 1166: 
 1167: utf32_little_to_list_bsyntax(<<>>) ->
 1168:     [];
 1169: utf32_little_to_list_bsyntax(<<C/utf32-little,R/binary>>) ->
 1170:     [C|utf32_little_to_list_bsyntax(R)].
 1171: 
 1172: list_to_utf32_little_bsyntax(List,{utf32,little}) ->
 1173:     FList = flatx(List),
 1174:     list_to_binary([ if
 1175: 			 is_binary(E) ->
 1176: 			     E;
 1177: 			 true ->
 1178: 			     <<E/utf32-little>>
 1179: 		      end || E <- FList ]);
 1180: list_to_utf32_little_bsyntax(List,latin1) ->
 1181:     FList = flatb(List),
 1182:     list_to_binary([ <<E/utf32-little>> || E <- FList ]).
 1183: 
 1184:     
 1185: 
 1186: %% int_to_utf8(I, NumberOfBytes) -> Binary.
 1187: %%  This function can be used to construct overlong sequences.
 1188: int_to_utf8(I, 1) ->
 1189:     <<I>>;
 1190: int_to_utf8(I, 2) ->
 1191:     B2 = I,
 1192:     B1 = (I bsr 6),
 1193:     <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
 1194: int_to_utf8(I, 3) ->
 1195:     B3 = I,
 1196:     B2 = (I bsr 6),
 1197:     B1 = (I bsr 12),
 1198:     <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
 1199: int_to_utf8(I, 4) ->
 1200:     B4 = I,
 1201:     B3 = (I bsr 6),
 1202:     B2 = (I bsr 12),
 1203:     B1 = (I bsr 18),
 1204:     <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>.
 1205: 
 1206: utf8_to_list(<<>>) ->
 1207:     [];
 1208: utf8_to_list(Bin) ->
 1209:     N = utf8_siz(Bin),
 1210:     <<X:N/binary,Rest/binary>> = Bin,
 1211:     [utf8_to_int(X) | utf8_to_list(Rest)].
 1212: utf8_siz(<<0:1,_:7,_/binary>>) ->
 1213:     1;
 1214: utf8_siz(<<1:1,1:1,0:1,_:5,_/binary>>) ->
 1215:     2;
 1216: utf8_siz(<<1:1,1:1,1:1,0:1,_:4,_/binary>>) ->
 1217:     3;
 1218: utf8_siz(<<1:1,1:1,1:1,1:1,0:1,_:3,_/binary>>) ->
 1219:     4.
 1220: 
 1221: utf8_to_int(<<0:1,B:7>>) ->
 1222:     B;
 1223: utf8_to_int(<<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>) ->
 1224:     (B1 bsl 6) bor B2;
 1225: utf8_to_int(<<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>) ->
 1226:     (B1 bsl 12) bor (B2 bsl 6) bor B3; 
 1227: utf8_to_int(<<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,
 1228: 	     B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>) ->
 1229:     Res = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
 1230:     case Res of
 1231: 	X when X > 16#10FFFF ->
 1232: 	    exit(unsupported_utf8);
 1233: 	Other ->
 1234: 	    Other
 1235:     end;
 1236: utf8_to_int(_) ->
 1237:     exit(unsupported_utf8).
 1238: 
 1239: 
 1240: utf16_big_to_list(<<>>) ->
 1241:     [];
 1242: utf16_big_to_list(Bin) ->
 1243:     N = utf16_big_siz(Bin),
 1244:     <<X:N/binary,Rest/binary>> = Bin,
 1245:     [utf16_big_to_int(X) | utf16_big_to_list(Rest)].
 1246: 
 1247: utf16_big_siz(<<1:1,1:1,0:1,1:1,1:1,0:1,_:1,_:1,_/binary>>) ->
 1248:     4;
 1249: utf16_big_siz(_) ->
 1250:     2.
 1251: 
 1252: utf16_big_to_int(<<1:1,1:1,0:1,1:1,1:1,0:1,W1:10,1:1,1:1,0:1,1:1,1:1,1:1,W2:10>>) ->
 1253:     ((W1 bsl 10) bor W2) + 16#10000;
 1254: utf16_big_to_int(<<W:16>>) ->
 1255:     W;
 1256: utf16_big_to_int(_) ->
 1257:     exit(unsupported_utf16_big).
 1258: 
 1259: utf16_little_to_list(<<>>) ->
 1260:     [];
 1261: utf16_little_to_list(Bin) ->
 1262:     N = utf16_little_siz(Bin),
 1263:     <<X:N/binary,Rest/binary>> = Bin,
 1264:     [utf16_little_to_int(X) | utf16_little_to_list(Rest)].
 1265: utf16_little_siz(<<_:8,1:1,1:1,0:1,1:1,1:1,0:1,_:1,_:1,_/binary>>) ->
 1266:     4;
 1267: utf16_little_siz(_) ->
 1268:     2.
 1269: 
 1270: utf16_little_to_int(<<W1B:8,1:1,1:1,0:1,1:1,1:1,0:1,W1A:2,W2B:8,1:1,1:1,0:1,1:1,1:1,1:1,W2A:2>>) ->
 1271:     W1 = (W1A bsl 8) bor W1B,
 1272:     W2 = (W2A bsl 8) bor W2B,
 1273:     ((W1 bsl 10) bor W2) + 16#10000;
 1274: utf16_little_to_int(<<W:16/little>>) ->
 1275:     W;
 1276: utf16_little_to_int(_) ->
 1277:     exit(unsupported_utf16_little).
 1278: 
 1279: utf32_big_to_list(<<>>) ->
 1280:     [];
 1281: utf32_big_to_list(<<I:32,Rest/binary>>) ->
 1282:     [ I | utf32_big_to_list(Rest)].
 1283: utf32_little_to_list(<<>>) ->
 1284:     [];
 1285: utf32_little_to_list(<<I:32/little,Rest/binary>>) ->
 1286:     [ I | utf32_little_to_list(Rest)].
 1287: 
 1288: 
 1289: x_to_list_bsyntax(utf8,Bin) ->
 1290:     utf8_to_list_bsyntax(Bin);
 1291: x_to_list_bsyntax({utf16,big},Bin) ->
 1292:     utf16_big_to_list_bsyntax(Bin);
 1293: x_to_list_bsyntax({utf16,little},Bin) ->
 1294:     utf16_little_to_list_bsyntax(Bin);
 1295: x_to_list_bsyntax({utf32,big},Bin) ->
 1296:     utf32_big_to_list_bsyntax(Bin);
 1297: x_to_list_bsyntax({utf32,little},Bin) ->
 1298:     utf32_little_to_list_bsyntax(Bin).
 1299: 
 1300: list_to_x_bsyntax(utf8,L,utf8) ->
 1301:     list_to_utf8_bsyntax(L,unicode);
 1302: list_to_x_bsyntax(utf8,L,Enc) ->
 1303:     list_to_utf8_bsyntax(L,Enc);
 1304: list_to_x_bsyntax({utf16,big},L,Enc) ->
 1305:     list_to_utf16_big_bsyntax(L,Enc);
 1306: list_to_x_bsyntax({utf16,little},L,Enc) ->
 1307:     list_to_utf16_little_bsyntax(L,Enc);
 1308: list_to_x_bsyntax({utf32,big},L,Enc) ->
 1309:     list_to_utf32_big_bsyntax(L,Enc);
 1310: list_to_x_bsyntax({utf32,little},L,Enc) ->
 1311:     list_to_utf32_little_bsyntax(L,Enc).
 1312:     
 1313: 
 1314: make_unaligned(Bin0) when is_binary(Bin0) ->
 1315: %    put(c_count,get(c_count)+1),    
 1316:     Bin1 = <<0:3,Bin0/binary,31:5>>,
 1317:     Sz = byte_size(Bin0),
 1318:     <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
 1319:     Bin.
 1320: 
 1321: id(I) -> I.
 1322: 
 1323: setlimit(X) ->
 1324:     erts_debug:set_internal_state(available_internal_state,true),
 1325:     io:format("Setting loop limit, old: ~p, now set to ~p~n",
 1326: 	      [erts_debug:set_internal_state(unicode_loop_limit,X),X]).
 1327: 
 1328: 
 1329: %%
 1330: %% Tracing utility
 1331: %%
 1332: 
 1333: %% tr_dump() ->
 1334: %%     erlang:display(lists:sort(ets:tab2list(values))).
 1335: 
 1336: %% tr_off(Pid) ->
 1337: %%     receive after 10000 -> ok end,
 1338: %%     tr_dump(),
 1339: %%     Ref = erlang:monitor(process,Pid),
 1340: %%     exit(Pid,kill),
 1341: %%     receive
 1342: %% 	{'DOWN',Ref,_,_,_} -> ok
 1343: %%     end,
 1344: %%     ok.
 1345: 
 1346: %% tr_on() ->   
 1347: %%     catch ets:delete(values),
 1348: %%     ets:new(values,[named_table,public]),
 1349: %%     ets:insert(values,{traps,0}),
 1350: %%     catch ets:delete(state),
 1351: %%     ets:new(state,[named_table,public]),
 1352: %%     Pid = spawn(?MODULE,trace_recv,[values,state]),
 1353: %%     erlang:trace(new,true,[garbage_collection,{tracer,Pid},timestamp,call]),
 1354: %%     erlang:trace_pattern({erlang,list_to_utf8,2},[{'_',[],[{return_trace}]}],[global]),
 1355: %%     Pid.
 1356: 
 1357: %% ts_to_int({Mega,Sec,Micro}) ->
 1358: %%     ((Mega * 1000000) + Sec) * 1000000 + Micro.
 1359: 
 1360: %% trace_recv(Values,State) ->
 1361: %%     receive
 1362: %% 	{trace_ts,Pid,call,_,TS} ->
 1363: %% 	    case ets:lookup(State,{call,Pid}) of
 1364: %% 		[{{call,Pid},_}] ->
 1365: %% 		    ets:update_counter(values,traps,1);
 1366: %% 		_ ->
 1367: %% 		    ok
 1368: %% 	    end,
 1369: %% 	    ets:insert(State,{{call,Pid},ts_to_int(TS)});
 1370: %% 	{trace_ts,Pid,return_from,_,_,TS} ->
 1371: %% 	    case ets:lookup(State,{call,Pid}) of
 1372: %% 		[{{call,Pid},TS2}] ->
 1373: %% 		    ets:delete(State,{call,Pid}),
 1374: %% 		    Elapsed = ts_to_int(TS) - TS2,
 1375: %% 		    case ets:lookup(Values,Pid) of
 1376: %% 			[{Pid,GCNum,CallNum,GCTime,CallTime}] ->
 1377: %% 			    ets:insert(Values,{Pid,GCNum,CallNum+1,GCTime,CallTime+Elapsed});
 1378: %% 			[] ->
 1379: %% 			     ets:insert(Values,{Pid,0,1,0,Elapsed})
 1380: %% 		    end;
 1381: %% 		_Other ->
 1382: %% 		    erlang:display({what2,Pid})
 1383: %% 	    end;
 1384: %% 	{trace_ts,Pid,gc_start,_,TS} ->
 1385: %% 	    ets:insert(State,{{gc,Pid},ts_to_int(TS)});
 1386: %% 	{trace_ts,Pid,gc_end,_,TS} ->
 1387: %% 	    case ets:lookup(State,{gc,Pid}) of
 1388: %% 		[{{gc,Pid},TS2}] ->
 1389: %% 		    ets:delete(State,{gc,Pid}),
 1390: %% 		    Elapsed = ts_to_int(TS) - TS2,
 1391: %% 		    case ets:lookup(Values,Pid) of
 1392: %% 			[{Pid,Num,CNum,Time,CTime}] ->
 1393: %% 			    ets:insert(Values,{Pid,Num+1,CNum,Time+Elapsed,CTime});
 1394: %% 			[] ->
 1395: %% 			     ets:insert(Values,{Pid,1,0,Elapsed,0})
 1396: %% 		    end;
 1397: %% 		_Other ->
 1398: %% 		    erlang:display({what,Pid})
 1399: %% 	    end;
 1400: %% 	X ->
 1401: %% 	    erlang:display({trace_recv,X})
 1402: %%     end,
 1403: %%     trace_recv(Values,State).