1: %% 2: %% %CopyrightBegin% 3: %% 4: %% Copyright Ericsson AB 2008-2011. All Rights Reserved. 5: %% 6: %% The contents of this file are subject to the Erlang Public License, 7: %% Version 1.1, (the "License"); you may not use this file except in 8: %% compliance with the License. You should have received a copy of the 9: %% Erlang Public License along with this software. If not, it can be 10: %% retrieved online at http://www.erlang.org/. 11: %% 12: %% Software distributed under the License is distributed on an "AS IS" 13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 14: %% the License for the specific language governing rights and limitations 15: %% under the License. 16: %% 17: %% %CopyrightEnd% 18: %% 19: 20: %% 21: 22: -module(bs_utf_SUITE). 23: 24: -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2, 25: init_per_suite/1,end_per_suite/1, 26: init_per_testcase/2,end_per_testcase/2, 27: utf8_roundtrip/1,unused_utf_char/1,utf16_roundtrip/1, 28: utf32_roundtrip/1,guard/1,extreme_tripping/1]). 29: 30: -include_lib("test_server/include/test_server.hrl"). 31: -compile([no_jopt,time]). 32: 33: suite() -> [{ct_hooks,[ts_install_cth]}]. 34: 35: all() -> 36: cases(). 37: 38: groups() -> 39: []. 40: 41: init_per_group(_GroupName, Config) -> 42: Config. 43: 44: end_per_group(_GroupName, Config) -> 45: Config. 46: 47: 48: cases() -> 49: [utf8_roundtrip, unused_utf_char, utf16_roundtrip, 50: utf32_roundtrip, guard, extreme_tripping]. 51: 52: init_per_testcase(_Case, Config) -> 53: test_lib:interpret(?MODULE), 54: Dog = test_server:timetrap(?t:minutes(1)), 55: [{watchdog,Dog}|Config]. 56: 57: end_per_testcase(_Case, Config) -> 58: Dog = ?config(watchdog, Config), 59: ?t:timetrap_cancel(Dog), 60: ok. 61: 62: init_per_suite(Config) when is_list(Config) -> 63: ?line test_lib:interpret(?MODULE), 64: ?line true = lists:member(?MODULE, int:interpreted()), 65: Config. 66: 67: end_per_suite(Config) when is_list(Config) -> 68: ok. 69: 70: utf8_roundtrip(Config) when is_list(Config) -> 71: ?line [utf8_roundtrip_1(P) || P <- utf_data()], 72: ok. 73: 74: utf8_roundtrip_1({Str,Bin,Bin}) -> 75: ?line Str = utf8_to_list(Bin), 76: ?line Bin = list_to_utf8(Str), 77: ?line [ok = utf8_guard(C, <<42,C/utf8>>) || C <- Str], 78: ?line [error = utf8_guard(C, <<C/utf8>>) || C <- Str], 79: ok. 80: 81: utf8_guard(C, Bin) when <<42,C/utf8>> =:= Bin -> ok; 82: utf8_guard(_, _) -> error. 83: 84: utf8_to_list(<<C/utf8,T/binary>>) -> 85: [C|utf8_to_list(T)]; 86: utf8_to_list(<<>>) -> []. 87: 88: list_to_utf8(L) -> 89: list_to_utf8(L, <<>>). 90: 91: list_to_utf8([H|T], Bin) -> 92: list_to_utf8(T, <<Bin/binary,H/utf8>>); 93: list_to_utf8([], Bin) -> Bin. 94: 95: unused_utf_char(Config) when is_list(Config) -> 96: [true = utf8_len(Utf8) =:= length(Str) || 97: {Str,Utf8} <- utf_data()], 98: ok. 99: 100: utf8_len(B) -> 101: utf8_len(B, 0). 102: 103: utf8_len(<<_/utf8,T/binary>>, N) -> 104: utf8_len(T, N+1); 105: utf8_len(<<>>, N) -> N. 106: 107: utf16_roundtrip(Config) when is_list(Config) -> 108: ?line {Str,Big,Big,Little,Little} = utf16_data(), 109: ?line 4 = utf16_big_len(Big), 110: ?line 4 = utf16_little_len(Little), 111: ?line Str = big_utf16_to_list(Big), 112: ?line Str = little_utf16_to_list(Little), 113: 114: ?line Big = list_to_big_utf16(Str), 115: ?line Little = list_to_little_utf16(Str), 116: 117: ok. 118: 119: utf16_big_len(B) -> 120: utf16_big_len(B, 0). 121: 122: utf16_big_len(<<_/utf16,T/binary>>, N) -> 123: utf16_big_len(T, N+1); 124: utf16_big_len(<<>>, N) -> N. 125: 126: utf16_little_len(B) -> 127: utf16_little_len(B, 0). 128: 129: utf16_little_len(<<_/little-utf16,T/binary>>, N) -> 130: utf16_little_len(T, N+1); 131: utf16_little_len(<<>>, N) -> N. 132: 133: list_to_big_utf16(List) -> 134: list_to_big_utf16(List, <<>>). 135: 136: list_to_big_utf16([H|T], Bin) -> 137: list_to_big_utf16(T, <<Bin/binary,H/utf16>>); 138: list_to_big_utf16([], Bin) -> Bin. 139: 140: list_to_little_utf16(List) -> 141: list_to_little_utf16(List, <<>>). 142: 143: list_to_little_utf16([H|T], Bin) -> 144: list_to_little_utf16(T, <<Bin/binary,H/little-utf16>>); 145: list_to_little_utf16([], Bin) -> Bin. 146: 147: big_utf16_to_list(<<H/utf16,T/binary>>) -> 148: [H|big_utf16_to_list(T)]; 149: big_utf16_to_list(<<>>) -> []. 150: 151: little_utf16_to_list(<<H/little-utf16,T/binary>>) -> 152: [H|little_utf16_to_list(T)]; 153: little_utf16_to_list(<<>>) -> []. 154: 155: utf32_roundtrip(Config) when is_list(Config) -> 156: ?line {Str,Big,Big,Little,Little} = utf32_data(), 157: ?line 4 = utf32_big_len(Big), 158: ?line 4 = utf32_little_len(Little), 159: ?line Str = big_utf32_to_list(Big), 160: ?line Str = little_utf32_to_list(Little), 161: 162: ?line Big = list_to_big_utf32(Str), 163: ?line Little = list_to_little_utf32(Str), 164: 165: ok. 166: 167: utf32_big_len(B) -> 168: utf32_big_len(B, 0). 169: 170: utf32_big_len(<<_/utf32,T/binary>>, N) -> 171: utf32_big_len(T, N+1); 172: utf32_big_len(<<>>, N) -> N. 173: 174: utf32_little_len(B) -> 175: utf32_little_len(B, 0). 176: 177: utf32_little_len(<<_/little-utf32,T/binary>>, N) -> 178: utf32_little_len(T, N+1); 179: utf32_little_len(<<>>, N) -> N. 180: 181: list_to_big_utf32(List) -> 182: list_to_big_utf32(List, <<>>). 183: 184: list_to_big_utf32([H|T], Bin) -> 185: list_to_big_utf32(T, <<Bin/binary,H/utf32>>); 186: list_to_big_utf32([], Bin) -> Bin. 187: 188: list_to_little_utf32(List) -> 189: list_to_little_utf32(List, <<>>). 190: 191: list_to_little_utf32([H|T], Bin) -> 192: list_to_little_utf32(T, <<Bin/binary,H/little-utf32>>); 193: list_to_little_utf32([], Bin) -> Bin. 194: 195: big_utf32_to_list(<<H/utf32,T/binary>>) -> 196: [H|big_utf32_to_list(T)]; 197: big_utf32_to_list(<<>>) -> []. 198: 199: little_utf32_to_list(<<H/little-utf32,T/binary>>) -> 200: [H|little_utf32_to_list(T)]; 201: little_utf32_to_list(<<>>) -> []. 202: 203: 204: guard(Config) when is_list(Config) -> 205: ?line error = do_guard(16#D800), 206: ok. 207: 208: do_guard(C) when byte_size(<<C/utf8>>) =/= 42 -> ok; 209: do_guard(C) when byte_size(<<C/utf16>>) =/= 42 -> ok; 210: do_guard(C) when byte_size(<<C/utf32>>) =/= 42 -> ok; 211: do_guard(_) -> error. 212: 213: %% The purpose of this test is to make sure that 214: %% the delayed creation of sub-binaries works. 215: 216: extreme_tripping(Config) when is_list(Config) -> 217: ?line Unicode = lists:seq(0, 1024), 218: ?line Utf8 = unicode_to_utf8(Unicode, <<>>), 219: ?line Utf16 = utf8_to_utf16(Utf8, <<>>), 220: ?line Utf32 = utf8_to_utf32(Utf8, <<>>), 221: ?line Utf32 = utf16_to_utf32(Utf16, <<>>), 222: ?line Utf8 = utf32_to_utf8(Utf32, <<>>), 223: ?line Unicode = utf32_to_unicode(Utf32), 224: ok. 225: 226: unicode_to_utf8([C|T], Bin) -> 227: unicode_to_utf8(T, <<Bin/bytes,C/utf8>>); 228: unicode_to_utf8([], Bin) -> Bin. 229: 230: utf8_to_utf16(<<C/utf8,T/binary>>, Bin) -> 231: utf8_to_utf16(T, <<Bin/bytes,C/utf16>>); 232: utf8_to_utf16(<<>>, Bin) -> Bin. 233: 234: utf16_to_utf32(<<C/utf16,T/binary>>, Bin) -> 235: utf16_to_utf32(T, <<Bin/bytes,C/utf32>>); 236: utf16_to_utf32(<<>>, Bin) -> Bin. 237: 238: utf8_to_utf32(<<C/utf8,T/binary>>, Bin) -> 239: utf8_to_utf32(T, <<Bin/bytes,C/utf32>>); 240: utf8_to_utf32(<<>>, Bin) -> Bin. 241: 242: utf32_to_utf8(<<C/utf32,T/binary>>, Bin) -> 243: utf32_to_utf8(T, <<Bin/bytes,C/utf8>>); 244: utf32_to_utf8(<<>>, Bin) -> Bin. 245: 246: utf32_to_unicode(<<C/utf32,T/binary>>) -> 247: [C|utf32_to_unicode(T)]; 248: utf32_to_unicode(<<>>) -> []. 249: 250: utf_data() -> 251: %% From RFC-3629. 252: 253: %% Give the compiler a change to do some constant propagation. 254: NotIdentical = 16#2262, 255: 256: [ 257: %% "A<NOT IDENTICAL TO><ALPHA>." 258: {[16#0041,NotIdentical,16#0391,16#002E], 259: <<16#0041/utf8,NotIdentical/utf8,16#0391/utf8,16#002E/utf8>>, 260: <<16#41,16#E2,16#89,16#A2,16#CE,16#91,16#2E>>}, 261: 262: %% Korean "hangugeo" (meaning "the Korean language") 263: {[16#D55C,16#AD6D,16#C5B4], 264: <<16#D55C/utf8,16#AD6D/utf8,16#C5B4/utf8>>, 265: <<16#ED,16#95,16#9C,16#EA,16#B5,16#AD,16#EC,16#96,16#B4>>}, 266: 267: %% Japanese "nihongo" (meaning "the Japanese language"). 268: {[16#65E5,16#672C,16#8A9E], 269: <<16#65E5/utf8,16#672C/utf8,16#8A9E/utf8>>, 270: <<16#E6,16#97,16#A5,16#E6,16#9C,16#AC,16#E8,16#AA,16#9E>>} 271: ]. 272: 273: utf16_data() -> 274: %% Example from RFC-2781. "*=Ra", where "*" represents a 275: %% hypothetical Ra hieroglyph (code point 16#12345). 276: 277: %% Give the compiler a change to do some constant propagation. 278: RaHieroglyph = 16#12345, 279: 280: %% First as a list of Unicode characters. 281: {[RaHieroglyph,16#3D,16#52,16#61], 282: 283: %% Big endian (the two binaries should be equal). 284: <<RaHieroglyph/big-utf16,16#3D/big-utf16,16#52/big-utf16,16#61/big-utf16>>, 285: <<16#D8,16#08,16#DF,16#45,16#00,16#3D,16#00,16#52,16#00,16#61>>, 286: 287: %% Little endian (the two binaries should be equal). 288: <<RaHieroglyph/little-utf16,16#3D/little-utf16, 289: 16#52/little-utf16,16#61/little-utf16>>, 290: <<16#08,16#D8,16#45,16#DF,16#3D,16#00,16#52,16#00,16#61,16#00>>}. 291: 292: utf32_data() -> 293: %% "A<NOT IDENTICAL TO><ALPHA>." 294: NotIdentical = 16#2262, 295: {[16#0041,NotIdentical,16#0391,16#002E], 296: 297: %% Big endian. 298: <<16#0041/utf32,NotIdentical/utf32,16#0391/utf32,16#002E/utf32>>, 299: <<16#41:32,NotIdentical:32,16#0391:32,16#2E:32>>, 300: 301: %% Little endian. 302: <<16#0041/little-utf32,NotIdentical/little-utf32, 303: 16#0391/little-utf32,16#002E/little-utf32>>, 304: <<16#41:32/little,NotIdentical:32/little, 305: 16#0391:32/little,16#2E:32/little>>}.