From 2496dccae4622cb5fea9ae8f54adb0b83eadaf07 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 24 Jan 2012 23:44:19 -0800 Subject: [PATCH 01/12] Changed type of str::from_bytes and added str::from_byte --- src/libcore/str.rs | 61 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 823f7cc78c6b2..5febb443828fd 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -13,6 +13,7 @@ export // Creating a string from_bytes, unsafe_from_bytes, + from_byte, unsafe_from_byte, //push_utf8_bytes, from_char, @@ -117,14 +118,11 @@ Section: Creating a string /* Function: from_bytes -Safely convert a vector of bytes to a UTF-8 string, or error +Convert a vector of bytes to a UTF-8 string. Fails if invalid UTF-8. */ -fn from_bytes(vv: [u8]) -> result::t { - if is_utf8(vv) { - ret result::ok(unsafe_from_bytes(vv)); - } else { - ret result::err("vector doesn't contain valid UTF-8"); - } +fn from_bytes(vv: [u8]) -> str { + assert is_utf8(vv); + ret unsafe_from_bytes(vv); } /* @@ -133,7 +131,7 @@ Function: unsafe_from_bytes Converts a vector of bytes to a string. Does not verify that the vector contains valid UTF-8. -// FIXME: remove? +FIXME: don't export? */ fn unsafe_from_bytes(v: [const u8]) -> str unsafe { let vcopy: [u8] = v + [0u8]; @@ -152,6 +150,16 @@ FIXME: rename to 'from_byte' */ fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) } + +/* +Function: from_byte + +Convert a byte to a UTF-8 string. Fails if invalid UTF-8. +*/ +fn from_byte(uu: u8) -> str { + from_bytes([uu]) +} + fn push_utf8_bytes(&s: str, ch: char) { let code = ch as uint; let bytes = @@ -526,7 +534,7 @@ fn split(s: str, sep: u8) -> [str] { v += [accum]; accum = ""; ends_with_sep = true; - } else { accum += unsafe_from_byte(c); ends_with_sep = false; } + } else { accum += from_byte(c); ends_with_sep = false; } } if byte_len(accum) != 0u || ends_with_sep { v += [accum]; } ret v; @@ -554,7 +562,7 @@ fn splitn(s: str, sep: u8, count: uint) -> [str] { v += [accum]; accum = ""; ends_with_sep = true; - } else { accum += unsafe_from_byte(c); ends_with_sep = false; } + } else { accum += from_byte(c); ends_with_sep = false; } } if byte_len(accum) != 0u || ends_with_sep { v += [accum]; } ret v; @@ -575,12 +583,12 @@ FIXME: should behave like split and split_char: */ fn split_str(s: str, sep: str) -> [str] { assert byte_len(sep) > 0u; - let v: [str] = [], accum = "", sep_match = 0u, leading = true; + let v: [str] = [], accum = [], sep_match = 0u, leading = true; for c: u8 in s { // Did we match the entire separator? if sep_match == byte_len(sep) { - if !leading { v += [accum]; } - accum = ""; + if !leading { vec::push(v, from_bytes(accum)); } + accum = []; sep_match = 0u; } @@ -588,13 +596,13 @@ fn split_str(s: str, sep: str) -> [str] { sep_match += 1u; } else { sep_match = 0u; - accum += unsafe_from_byte(c); + vec::push(accum, c); leading = false; } } - if byte_len(accum) > 0u { v += [accum]; } - if sep_match == byte_len(sep) { v += [""]; } + if vec::len(accum) > 0u { vec::push(v, from_bytes(accum)); } + if sep_match == byte_len(sep) { vec::push(v, ""); } ret v; } @@ -1783,7 +1791,24 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8]; - assert ss == result::get(from_bytes(bb)); + assert ss == from_bytes(bb); + } + + #[test] + #[should_fail] + fn test_from_bytes_fail() { + let bb = [0xff_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8]; + + let _x = from_bytes(bb); } #[test] @@ -1821,7 +1846,7 @@ mod tests { let s1: str = "All mimsy were the borogoves"; let v: [u8] = bytes(s1); - let s2: str = unsafe_from_bytes(v); + let s2: str = from_bytes(v); let i: uint = 0u; let n1: uint = byte_len(s1); let n2: uint = vec::len::(v); From 9750e83a17a8f9f865eae757dcdccf374b1c82b7 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 24 Jan 2012 23:47:32 -0800 Subject: [PATCH 02/12] Replace uses of str::unsafe_from_byte --- src/comp/metadata/tydecode.rs | 4 ++-- src/libcore/str.rs | 2 +- src/libcore/uint.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/comp/metadata/tydecode.rs b/src/comp/metadata/tydecode.rs index 2e4e5cdc66dba..cbd11c1c1b998 100644 --- a/src/comp/metadata/tydecode.rs +++ b/src/comp/metadata/tydecode.rs @@ -39,7 +39,7 @@ fn parse_ident_(st: @pstate, is_last: fn@(char) -> bool) -> ast::ident { let rslt = ""; while !is_last(peek(st) as char) { - rslt += str::unsafe_from_byte(next(st)); + rslt += str::from_byte(next(st)); } ret rslt; } @@ -226,7 +226,7 @@ fn parse_ty(st: @pstate, conv: conv_did) -> ty::t { while peek(st) as char != ']' { let name = ""; while peek(st) as char != '=' { - name += str::unsafe_from_byte(next(st)); + name += str::from_byte(next(st)); } st.pos = st.pos + 1u; fields += [{ident: name, mt: parse_mt(st, conv)}]; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 5febb443828fd..36453d78c98fc 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -146,7 +146,7 @@ Function: unsafe_from_byte Converts a byte to a string. Does not verify that the byte is valid UTF-8. -FIXME: rename to 'from_byte' +FIXME: REMOVE. */ fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) } diff --git a/src/libcore/uint.rs b/src/libcore/uint.rs index 8d032610e179e..2112399ba8026 100644 --- a/src/libcore/uint.rs +++ b/src/libcore/uint.rs @@ -236,12 +236,12 @@ fn to_str(num: uint, radix: uint) -> str { if n == 0u { ret "0"; } let s: str = ""; while n != 0u { - s += str::unsafe_from_byte(digit(n % radix) as u8); + s += str::from_byte(digit(n % radix) as u8); n /= radix; } let s1: str = ""; let len: uint = str::byte_len(s); - while len != 0u { len -= 1u; s1 += str::unsafe_from_byte(s[len]); } + while len != 0u { len -= 1u; s1 += str::from_byte(s[len]); } ret s1; } From c7b23f9a86fcdf328ebb349d9a5ad4e4c7dfe6ce Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 00:53:17 -0800 Subject: [PATCH 03/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 1) --- src/libcore/extfmt.rs | 1 + src/libstd/freebsd_os.rs | 3 ++- src/libstd/generic_os.rs | 2 +- src/libstd/linux_os.rs | 3 ++- src/libstd/macos_os.rs | 3 ++- src/libstd/run_program.rs | 4 ++-- src/libstd/uvtmp.rs | 4 ++-- src/libstd/win32_os.rs | 3 ++- src/test/bench/task-perf-word-count-generic.rs | 2 +- src/test/run-pass/hashmap-memory.rs | 2 +- 10 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 38b36f59d323d..a06e01f491e76 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -440,6 +440,7 @@ mod rt { let head = s[0]; if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { let headstr = str::unsafe_from_bytes([head]); + // FIXME: not UTF-8 safe let bytelen = str::byte_len(s); let numpart = str::substr(s, 1u, bytelen - 1u); ret headstr + padstr + numpart; diff --git a/src/libstd/freebsd_os.rs b/src/libstd/freebsd_os.rs index 847262ca10cd7..89aad5d777785 100644 --- a/src/libstd/freebsd_os.rs +++ b/src/libstd/freebsd_os.rs @@ -129,7 +129,8 @@ fn dylib_filename(base: str) -> str { ret "lib" + base + ".so"; } /// followed by a path separator fn get_exe_path() -> option::t unsafe { let bufsize = 1023u; - let path = str::unsafe_from_bytes(vec::init_elt(bufsize, 0u8)); + // FIXME: path "strings" will likely need fixing... + let path = str::from_bytes(vec::init_elt(bufsize, 0u8)); let mib = [libc_constants::CTL_KERN, libc_constants::KERN_PROC, libc_constants::KERN_PROC_PATHNAME, -1i32]; diff --git a/src/libstd/generic_os.rs b/src/libstd/generic_os.rs index 750b333f1a656..934352e778844 100644 --- a/src/libstd/generic_os.rs +++ b/src/libstd/generic_os.rs @@ -75,7 +75,7 @@ fn getenv(n: str) -> option::t { unsafe { vec::unsafe::set_len(v, res); } - ret option::some(str::unsafe_from_bytes(v)); + ret option::some(str::from_bytes(v)); // UTF-8 or fail } else { nsize = res; } } fail; diff --git a/src/libstd/linux_os.rs b/src/libstd/linux_os.rs index 7bc0212c1c2f6..82b1197a51fbd 100644 --- a/src/libstd/linux_os.rs +++ b/src/libstd/linux_os.rs @@ -125,7 +125,8 @@ fn dylib_filename(base: str) -> str { ret "lib" + base + ".so"; } /// followed by a path separator fn get_exe_path() -> option::t { let bufsize = 1023u; - let path = str::unsafe_from_bytes(vec::init_elt(bufsize, 0u8)); + // FIXME: path "strings" will likely need fixing... + let path = str::from_bytes(vec::init_elt(bufsize, 0u8)); ret str::as_buf("/proc/self/exe", { |proc_self_buf| str::as_buf(path, { |path_buf| if libc::readlink(proc_self_buf, path_buf, bufsize) != -1 { diff --git a/src/libstd/macos_os.rs b/src/libstd/macos_os.rs index b72fc73237931..cd984870781cb 100644 --- a/src/libstd/macos_os.rs +++ b/src/libstd/macos_os.rs @@ -133,8 +133,9 @@ fn dylib_filename(base: str) -> str { ret "lib" + base + ".dylib"; } fn get_exe_path() -> option::t { // FIXME: This doesn't handle the case where the buffer is too small + // FIXME: path "strings" will likely need fixing... let bufsize = 1023u32; - let path = str::unsafe_from_bytes(vec::init_elt(bufsize as uint, 0u8)); + let path = str::from_bytes(vec::init_elt(bufsize as uint, 0u8)); ret str::as_buf(path, { |path_buf| if mac_libc::_NSGetExecutablePath(path_buf, ptr::mut_addr_of(bufsize)) == 0i32 { diff --git a/src/libstd/run_program.rs b/src/libstd/run_program.rs index e40526b58840a..5b2de1e57d0c8 100644 --- a/src/libstd/run_program.rs +++ b/src/libstd/run_program.rs @@ -216,7 +216,7 @@ fn read_all(rd: io::reader) -> str { let buf = ""; while !rd.eof() { let bytes = rd.read_bytes(4096u); - buf += str::unsafe_from_bytes(bytes); + buf += str::from_bytes(bytes); } ret buf; } @@ -347,7 +347,7 @@ mod tests { let buf = ""; while !reader.eof() { let bytes = reader.read_bytes(4096u); - buf += str::unsafe_from_bytes(bytes); + buf += str::from_bytes(bytes); } os::fclose(file); ret buf; diff --git a/src/libstd/uvtmp.rs b/src/libstd/uvtmp.rs index e2059890511a6..d92a42d2c5ee4 100644 --- a/src/libstd/uvtmp.rs +++ b/src/libstd/uvtmp.rs @@ -131,7 +131,7 @@ fn test_http() { unsafe { log(error, len); let buf = vec::unsafe::from_buf(buf, len as uint); - let str = str::unsafe_from_bytes(buf); + let str = str::from_bytes(buf); #error("read something"); io::println(str); } @@ -146,4 +146,4 @@ fn test_http() { } join_thread(thread); delete_thread(thread); -} \ No newline at end of file +} diff --git a/src/libstd/win32_os.rs b/src/libstd/win32_os.rs index 949c818a7ed9d..83306c757a4dd 100644 --- a/src/libstd/win32_os.rs +++ b/src/libstd/win32_os.rs @@ -113,8 +113,9 @@ fn getcwd() -> str { ret rustrt::rust_getcwd(); } fn get_exe_path() -> option::t { // FIXME: This doesn't handle the case where the buffer is too small + // FIXME: path "strings" will likely need fixing... let bufsize = 1023u; - let path = str::unsafe_from_bytes(vec::init_elt(bufsize, 0u8)); + let path = str::from_bytes(vec::init_elt(bufsize, 0u8)); ret str::as_buf(path, { |path_buf| if kernel32::GetModuleFileNameA(0u, path_buf, bufsize as u32) != 0u32 { diff --git a/src/test/bench/task-perf-word-count-generic.rs b/src/test/bench/task-perf-word-count-generic.rs index 55fbc6705b682..8be28fabd7943 100644 --- a/src/test/bench/task-perf-word-count-generic.rs +++ b/src/test/bench/task-perf-word-count-generic.rs @@ -32,7 +32,7 @@ import comm::recv; import comm::send; fn map(&&filename: [u8], emit: map_reduce::putter<[u8], int>) { - let f = io::file_reader(str::unsafe_from_bytes(filename)); + let f = io::file_reader(str::from_bytes(filename)); while true { alt read_word(f) { diff --git a/src/test/run-pass/hashmap-memory.rs b/src/test/run-pass/hashmap-memory.rs index 777b56c7520d3..9301d93874ebe 100644 --- a/src/test/run-pass/hashmap-memory.rs +++ b/src/test/run-pass/hashmap-memory.rs @@ -81,7 +81,7 @@ mod map_reduce { mapper_done { num_mappers -= 1; } find_reducer(k, cc) { let c; - alt reducers.find(str::unsafe_from_bytes(k)) { + alt reducers.find(str::from_bytes(k)) { some(_c) { c = _c; } none { c = 0; } } From 64ce563c054573bd12425e3253b54c1ee91ec84f Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 01:00:21 -0800 Subject: [PATCH 04/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 2) --- src/compiletest/procsrv.rs | 8 ++++---- src/libcore/extfmt.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/compiletest/procsrv.rs b/src/compiletest/procsrv.rs index a758e9a21bb71..6f3fd87466a6c 100644 --- a/src/compiletest/procsrv.rs +++ b/src/compiletest/procsrv.rs @@ -85,7 +85,7 @@ fn readclose(fd: fd_t) -> str { let buf = ""; while !reader.eof() { let bytes = reader.read_bytes(4096u); - buf += str::unsafe_from_bytes(bytes); + buf += str::from_bytes(bytes); } os::fclose(file); ret buf; @@ -114,8 +114,8 @@ fn worker(p: port) { // the alt discriminant are wrong. alt recv(p) { exec(lib_path, prog, args, respchan) { - {lib_path: str::unsafe_from_bytes(lib_path), - prog: str::unsafe_from_bytes(prog), + {lib_path: str::from_bytes(lib_path), + prog: str::from_bytes(prog), args: clone_vecu8str(args), respchan: respchan} } @@ -189,7 +189,7 @@ fn clone_vecstr(v: [str]) -> [[u8]] { fn clone_vecu8str(v: [[u8]]) -> [str] { let r = []; for t in vec::slice(v, 0u, vec::len(v)) { - r += [str::unsafe_from_bytes(t)]; + r += [str::from_bytes(t)]; } ret r; } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index a06e01f491e76..eb91c2cb5d517 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -390,7 +390,7 @@ mod rt { fn str_init_elt(n_elts: uint, c: char) -> str { let svec = vec::init_elt::(n_elts, c as u8); - ret str::unsafe_from_bytes(svec); + ret str::from_bytes(svec); } enum pad_mode { pad_signed, pad_unsigned, pad_nozero, } fn pad(cv: conv, s: str, mode: pad_mode) -> str { @@ -439,7 +439,7 @@ mod rt { if signed && zero_padding && str::byte_len(s) > 0u { let head = s[0]; if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { - let headstr = str::unsafe_from_bytes([head]); + let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe let bytelen = str::byte_len(s); let numpart = str::substr(s, 1u, bytelen - 1u); From eaa4befd6df4a26be0776d6aab0a7925232d76a7 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 01:07:05 -0800 Subject: [PATCH 05/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 3) --- src/comp/middle/trans.rs | 2 +- src/comp/util/ppaux.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/comp/middle/trans.rs b/src/comp/middle/trans.rs index 8de5d7257e27b..e0dddd7057b15 100644 --- a/src/comp/middle/trans.rs +++ b/src/comp/middle/trans.rs @@ -274,7 +274,7 @@ fn sanitize(s: str) -> str { c != ' ' as u8 && c != '\t' as u8 && c != ';' as u8 { let v = [c]; - result += str::unsafe_from_bytes(v); + result += str::from_bytes(v); } } } diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 11ccd1cc71f29..37e5a5f5101d7 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -122,7 +122,7 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { } ty_var(v) { "" } ty_param(id, _) { - "'" + str::unsafe_from_bytes([('a' as u8) + (id as u8)]) + "'" + str::from_bytes([('a' as u8) + (id as u8)]) } _ { ty_to_short_str(cx, typ) } } From 87320a9f2768e5e011a6c8edaceb3d3b7ddc2747 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 01:20:55 -0800 Subject: [PATCH 06/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 4) --- src/comp/driver/driver.rs | 2 +- src/comp/metadata/decoder.rs | 20 ++++++++++---------- src/comp/syntax/parse/lexer.rs | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/comp/driver/driver.rs b/src/comp/driver/driver.rs index 9d8d9ac03b7ba..de622ad82516d 100644 --- a/src/comp/driver/driver.rs +++ b/src/comp/driver/driver.rs @@ -96,7 +96,7 @@ fn get_input_str(sess: session, infile: str) -> str { } } } else { io::stdin() }; - str::unsafe_from_bytes(stream.read_whole_stream()) + str::from_bytes(stream.read_whole_stream()) } fn time(do_it: bool, what: str, thunk: fn@() -> T) -> T { diff --git a/src/comp/metadata/decoder.rs b/src/comp/metadata/decoder.rs index bdd4623bbeaa4..7711c819b064d 100644 --- a/src/comp/metadata/decoder.rs +++ b/src/comp/metadata/decoder.rs @@ -83,7 +83,7 @@ fn item_family(item: ebml::doc) -> u8 { fn item_symbol(item: ebml::doc) -> str { let sym = ebml::get_doc(item, tag_items_data_item_symbol); - ret str::unsafe_from_bytes(ebml::doc_data(sym)); + ret str::from_bytes(ebml::doc_data(sym)); } fn variant_tag_id(d: ebml::doc) -> ast::def_id { @@ -161,7 +161,7 @@ fn tag_variant_ids(item: ebml::doc, cdata: cmd) -> [ast::def_id] { // definition the path refers to. fn resolve_path(path: [ast::ident], data: @[u8]) -> [ast::def_id] { fn eq_item(data: [u8], s: str) -> bool { - ret str::eq(str::unsafe_from_bytes(data), s); + ret str::eq(str::from_bytes(data), s); } let s = str::connect(path, "::"); let md = ebml::new_doc(data); @@ -177,7 +177,7 @@ fn resolve_path(path: [ast::ident], data: @[u8]) -> [ast::def_id] { fn item_name(item: ebml::doc) -> ast::ident { let name = ebml::get_doc(item, tag_paths_data_name); - str::unsafe_from_bytes(ebml::doc_data(name)) + str::from_bytes(ebml::doc_data(name)) } fn lookup_item_name(data: @[u8], id: ast::node_id) -> ast::ident { @@ -326,7 +326,7 @@ fn read_path(d: ebml::doc) -> {path: str, pos: uint} { let desc = ebml::doc_data(d); let pos = ebml::be_uint_from_bytes(@desc, 0u, 4u); let pathbytes = vec::slice::(desc, 4u, vec::len::(desc)); - let path = str::unsafe_from_bytes(pathbytes); + let path = str::from_bytes(pathbytes); ret {path: path, pos: pos}; } @@ -359,21 +359,21 @@ fn get_meta_items(md: ebml::doc) -> [@ast::meta_item] { let items: [@ast::meta_item] = []; ebml::tagged_docs(md, tag_meta_item_word) {|meta_item_doc| let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name); - let n = str::unsafe_from_bytes(ebml::doc_data(nd)); + let n = str::from_bytes(ebml::doc_data(nd)); items += [attr::mk_word_item(n)]; }; ebml::tagged_docs(md, tag_meta_item_name_value) {|meta_item_doc| let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name); let vd = ebml::get_doc(meta_item_doc, tag_meta_item_value); - let n = str::unsafe_from_bytes(ebml::doc_data(nd)); - let v = str::unsafe_from_bytes(ebml::doc_data(vd)); + let n = str::from_bytes(ebml::doc_data(nd)); + let v = str::from_bytes(ebml::doc_data(vd)); // FIXME (#611): Should be able to decode meta_name_value variants, // but currently they can't be encoded items += [attr::mk_name_value_item_str(n, v)]; }; ebml::tagged_docs(md, tag_meta_item_list) {|meta_item_doc| let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name); - let n = str::unsafe_from_bytes(ebml::doc_data(nd)); + let n = str::from_bytes(ebml::doc_data(nd)); let subitems = get_meta_items(meta_item_doc); items += [attr::mk_list_item(n, subitems)]; }; @@ -428,7 +428,7 @@ fn get_crate_deps(data: @[u8]) -> [crate_dep] { let depsdoc = ebml::get_doc(cratedoc, tag_crate_deps); let crate_num = 1; ebml::tagged_docs(depsdoc, tag_crate_dep) {|depdoc| - let depname = str::unsafe_from_bytes(ebml::doc_data(depdoc)); + let depname = str::from_bytes(ebml::doc_data(depdoc)); deps += [{cnum: crate_num, ident: depname}]; crate_num += 1; }; @@ -448,7 +448,7 @@ fn list_crate_deps(data: @[u8], out: io::writer) { fn get_crate_hash(data: @[u8]) -> str { let cratedoc = ebml::new_doc(data); let hashdoc = ebml::get_doc(cratedoc, tag_crate_hash); - ret str::unsafe_from_bytes(ebml::doc_data(hashdoc)); + ret str::from_bytes(ebml::doc_data(hashdoc)); } fn list_crate_items(bytes: @[u8], md: ebml::doc, out: io::writer) { diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 3dc1666b11e31..f6a25ea61a7d5 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -671,7 +671,7 @@ fn gather_comments_and_literals(cm: codemap::codemap, path: str, srdr: io::reader) -> {cmnts: [cmnt], lits: [lit]} { - let src = str::unsafe_from_bytes(srdr.read_whole_stream()); + let src = str::from_bytes(srdr.read_whole_stream()); let itr = @interner::mk::(str::hash, str::eq); let rdr = new_reader(cm, diagnostic, src, codemap::new_filemap(path, 0u, 0u), itr); From 57717fa4d8f27a05e16cbc4de0d55728aaf32e73 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 01:29:44 -0800 Subject: [PATCH 07/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 5) --- src/libstd/io.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libstd/io.rs b/src/libstd/io.rs index 1db355dd385f9..ee03af79ab041 100644 --- a/src/libstd/io.rs +++ b/src/libstd/io.rs @@ -109,7 +109,7 @@ impl reader_util for reader { if ch == -1 || ch == 10 { break; } buf += [ch as u8]; } - str::unsafe_from_bytes(buf) + str::from_bytes(buf) } fn read_c_str() -> str { @@ -118,7 +118,7 @@ impl reader_util for reader { let ch = self.read_byte(); if ch < 1 { break; } else { buf += [ch as u8]; } } - str::unsafe_from_bytes(buf) + str::from_bytes(buf) } // FIXME deal with eof? @@ -479,7 +479,7 @@ fn seek_in_buf(offset: int, pos: uint, len: uint, whence: seek_style) -> fn read_whole_file_str(file: str) -> result::t { result::chain(read_whole_file(file), { |bytes| - result::ok(str::unsafe_from_bytes(bytes)) + result::ok(str::from_bytes(bytes)) }) } From a185b106471b4bc894393aecb31a23b71c8a4886 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 01:41:23 -0800 Subject: [PATCH 08/12] Replacing str::unsafe_from_bytes with str::from_bytes (part 6) --- src/libstd/io.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libstd/io.rs b/src/libstd/io.rs index ee03af79ab041..201cd46916e53 100644 --- a/src/libstd/io.rs +++ b/src/libstd/io.rs @@ -461,7 +461,10 @@ fn mk_mem_buffer() -> mem_buffer { } fn mem_buffer_writer(b: mem_buffer) -> writer { b as writer } fn mem_buffer_buf(b: mem_buffer) -> [u8] { vec::from_mut(b.buf) } -fn mem_buffer_str(b: mem_buffer) -> str { str::unsafe_from_bytes(b.buf) } +fn mem_buffer_str(b: mem_buffer) -> str { + let b_ = vec::from_mut(b.buf); + str::unsafe_from_bytes(b_) +} // Utility functions fn seek_in_buf(offset: int, pos: uint, len: uint, whence: seek_style) -> From fec36de94e02e9c7c6fec05a2aea32bfbdc2f0f8 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 02:25:57 -0800 Subject: [PATCH 09/12] Making str::from_cstr UTF-8 safe --- src/libcore/str.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 36453d78c98fc..b8d9a1dea11a5 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -217,16 +217,16 @@ Function: from_cstr Create a Rust string from a null-terminated C string */ unsafe fn from_cstr(cstr: sbuf) -> str { - let res = ""; + let res = []; let start = cstr; let curr = start; let i = 0u; while *curr != 0u8 { - push_byte(res, *curr); + vec::push(res, *curr); i += 1u; curr = ptr::offset(start, i); } - ret res; + ret from_bytes(res); } /* From 5847cf0367287d89377c57a431f38419ec1db008 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 02:33:05 -0800 Subject: [PATCH 10/12] (TEMPORARY) Break something by making io::mem_buffer_str UTF-8 safe --- src/libstd/io.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstd/io.rs b/src/libstd/io.rs index 201cd46916e53..5cc08813d6abc 100644 --- a/src/libstd/io.rs +++ b/src/libstd/io.rs @@ -463,7 +463,7 @@ fn mem_buffer_writer(b: mem_buffer) -> writer { b as writer } fn mem_buffer_buf(b: mem_buffer) -> [u8] { vec::from_mut(b.buf) } fn mem_buffer_str(b: mem_buffer) -> str { let b_ = vec::from_mut(b.buf); - str::unsafe_from_bytes(b_) + str::from_bytes(b_) } // Utility functions From 4d7c297beaa4326d8c7dda029aeec264616cfca6 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 22:27:29 -0800 Subject: [PATCH 11/12] (FIX) Change encoder::encode_metadata to return a [u8] (which will become an LLVM string) --- src/comp/metadata/encoder.rs | 4 ++-- src/comp/middle/trans.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/comp/metadata/encoder.rs b/src/comp/metadata/encoder.rs index cc79e6a2bf0bc..66da9f7c36d4f 100644 --- a/src/comp/metadata/encoder.rs +++ b/src/comp/metadata/encoder.rs @@ -662,7 +662,7 @@ fn encode_hash(ebml_w: ebml::writer, hash: str) { ebml::end_tag(ebml_w); } -fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> str { +fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> [u8] { let abbrevs = ty::new_ty_hash(); let ecx = @{ccx: cx, type_abbrevs: abbrevs}; @@ -695,7 +695,7 @@ fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> str { // Pad this, since something (LLVM, presumably) is cutting off the // remaining % 4 bytes. buf_w.write([0u8, 0u8, 0u8, 0u8]); - io::mem_buffer_str(buf) + io::mem_buffer_buf(buf) } // Get the encoded string for a type diff --git a/src/comp/middle/trans.rs b/src/comp/middle/trans.rs index e0dddd7057b15..41fe5f8e19a49 100644 --- a/src/comp/middle/trans.rs +++ b/src/comp/middle/trans.rs @@ -5419,7 +5419,7 @@ fn fill_crate_map(ccx: @crate_ctxt, map: ValueRef) { fn write_metadata(cx: @crate_ctxt, crate: @ast::crate) { if !cx.sess.building_library { ret; } - let llmeta = C_postr(metadata::encoder::encode_metadata(cx, crate)); + let llmeta = C_bytes(metadata::encoder::encode_metadata(cx, crate)); let llconst = trans_common::C_struct([llmeta]); let llglobal = str::as_buf("rust_metadata", From 3afc16f7a4ce66910107843b8f5cfbea3ad28a09 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 25 Jan 2012 23:20:10 -0800 Subject: [PATCH 12/12] Change FIXME comments in str --- src/libcore/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index b8d9a1dea11a5..dad10eced7f2b 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -131,7 +131,7 @@ Function: unsafe_from_bytes Converts a vector of bytes to a string. Does not verify that the vector contains valid UTF-8. -FIXME: don't export? +FIXME: stop exporting */ fn unsafe_from_bytes(v: [const u8]) -> str unsafe { let vcopy: [u8] = v + [0u8]; @@ -146,7 +146,7 @@ Function: unsafe_from_byte Converts a byte to a string. Does not verify that the byte is valid UTF-8. -FIXME: REMOVE. +FIXME: stop exporting */ fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) }