diff --git a/data-url/src/mime.rs b/data-url/src/mime.rs index 5840a2c60..9f14663ff 100644 --- a/data-url/src/mime.rs +++ b/data-url/src/mime.rs @@ -69,7 +69,7 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { while let Some(piece) = semicolon_separated.next() { let piece = piece.trim_start_matches(ascii_whitespace); let (name, value) = split2(piece, '='); - if name.is_empty() || !only_http_token_code_points(name) || contains(¶meters, name) { + if name.is_empty() || !only_http_token_code_points(name) || contains(parameters, name) { continue; } if let Some(value) = value { diff --git a/url/Cargo.toml b/url/Cargo.toml index e3846595b..343628acf 100644 --- a/url/Cargo.toml +++ b/url/Cargo.toml @@ -25,11 +25,14 @@ bencher = "0.1" [dependencies] form_urlencoded = { version = "1.0.0", path = "../form_urlencoded" } -idna = { version = "0.2.0", path = "../idna" } +idna = { version = "0.2.0", path = "../idna", optional = true } matches = "0.1" percent-encoding = { version = "2.1.0", path = "../percent_encoding" } serde = {version = "1.0", optional = true, features = ["derive"]} +[features] +default = ["idna"] + [[bench]] name = "parse_url" path = "benches/parse_url.rs" diff --git a/url/src/host.rs b/url/src/host.rs index 953743649..3f4cb3559 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -82,7 +82,9 @@ impl Host { return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); - let domain = idna::domain_to_ascii(&domain)?; + + let domain = Self::domain_to_ascii(&domain)?; + if domain.is_empty() { return Err(ParseError::EmptyHost); } @@ -156,6 +158,24 @@ impl Host { )) } } + + /// convert domain with idna + #[cfg(feature = "idna")] + fn domain_to_ascii(domain: &str) -> Result { + idna::domain_to_ascii(&domain).map_err(Into::into) + } + + /// checks domain is ascii + #[cfg(not(feature = "idna"))] + fn domain_to_ascii(domain: &str) -> Result { + // without idna feature, we can't verify that xn-- domains correctness + let domain = domain.to_lowercase(); + if domain.is_ascii() && domain.split('.').all(|s| !s.starts_with("xn--")) { + Ok(domain) + } else { + Err(ParseError::InvalidDomainCharacter) + } + } } impl> fmt::Display for Host { diff --git a/url/src/lib.rs b/url/src/lib.rs index d4d8f4e7d..83adb308f 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -118,6 +118,16 @@ See [serde documentation](https://serde.rs) for more information. ```toml url = { version = "2", features = ["serde"] } ``` + +# Feature: `idna` + +You can opt out [idna](https://en.wikipedia.org/wiki/Internationalized_domain_name) support +to reduce final binary size. + +```toml +url = { version = "2", default-features = false } +``` + */ #![doc(html_root_url = "https://docs.rs/url/2.2.2")] diff --git a/url/src/origin.rs b/url/src/origin.rs index be2d948b8..838be55bd 100644 --- a/url/src/origin.rs +++ b/url/src/origin.rs @@ -9,7 +9,6 @@ use crate::host::Host; use crate::parser::default_port; use crate::Url; -use idna::domain_to_unicode; use std::sync::atomic::{AtomicUsize, Ordering}; pub fn url_origin(url: &Url) -> Origin { @@ -87,13 +86,14 @@ impl Origin { } /// + #[cfg(feature = "idna")] pub fn unicode_serialization(&self) -> String { match *self { Origin::Opaque(_) => "null".to_owned(), Origin::Tuple(ref scheme, ref host, port) => { let host = match *host { Host::Domain(ref domain) => { - let (domain, _errors) = domain_to_unicode(domain); + let (domain, _errors) = idna::domain_to_unicode(domain); Host::Domain(domain) } _ => host.clone(), diff --git a/url/src/parser.rs b/url/src/parser.rs index caedb59c7..656b0c445 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -93,6 +93,7 @@ simple_enum_error! { Overflow => "URLs more than 4 GB are not supported", } +#[cfg(feature = "idna")] impl From<::idna::Errors> for ParseError { fn from(_: ::idna::Errors) -> ParseError { ParseError::IdnaError diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 0dbc6eb44..49d8c44d4 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -23,6 +23,7 @@ pub fn domain_to_ascii(domain: &str) -> String { } /// https://url.spec.whatwg.org/#dom-url-domaintounicode +#[cfg(feature = "idna")] pub fn domain_to_unicode(domain: &str) -> String { match Host::parse(domain) { Ok(Host::Domain(ref domain)) => { diff --git a/url/tests/data.rs b/url/tests/data.rs index b72c33306..79a70e1f4 100644 --- a/url/tests/data.rs +++ b/url/tests/data.rs @@ -16,6 +16,20 @@ use url::{quirks, Url}; #[test] fn urltestdata() { + #[cfg(not(feature = "idna"))] + let idna_skip_inputs = [ + "http://www.foo。bar.com", + "http://Go.com", + "http://你好你好", + "https://faß.ExAmPlE/", + "http://0Xc0.0250.01", + "ftp://%e2%98%83", + "https://%e2%98%83", + "file://a\u{ad}b/p", + "file://a%C2%ADb/p", + "http://GOO\u{200b}\u{2060}\u{feff}goo.com", + ]; + // Copied form https://github.com/w3c/web-platform-tests/blob/master/url/ let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); @@ -30,6 +44,13 @@ fn urltestdata() { let input = entry.take_string("input"); let failure = entry.take_key("failure").is_some(); + #[cfg(not(feature = "idna"))] + { + if idna_skip_inputs.contains(&input.as_str()) { + continue; + } + } + let base = match Url::parse(&base) { Ok(base) => base, Err(_) if failure => continue, @@ -106,6 +127,14 @@ fn setters_tests() { let mut tests = json.take_key(attr).unwrap(); for mut test in tests.as_array_mut().unwrap().drain(..) { let comment = test.take_key("comment").map(|s| s.string()); + #[cfg(not(feature = "idna"))] + { + if let Some(comment) = comment.as_ref() { + if comment.starts_with("IDNA Nontransitional_Processing") { + continue; + } + } + } let href = test.take_string("href"); let new_value = test.take_string("new_value"); let name = format!("{:?}.{} = {:?}", href, attr, new_value); diff --git a/url/tests/unit.rs b/url/tests/unit.rs index 13055a473..2a77bead3 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -296,6 +296,7 @@ fn host_serialization() { ); } +#[cfg(feature = "idna")] #[test] fn test_idna() { assert!("http://goșu.ro".parse::().is_ok()); @@ -531,6 +532,7 @@ fn test_origin_opaque() { assert!(!&Url::parse("blob:malformed//").unwrap().origin().is_tuple()) } +#[cfg(feature = "idna")] #[test] fn test_origin_unicode_serialization() { let data = [ @@ -703,6 +705,7 @@ fn test_set_href() { ); } +#[cfg(feature = "idna")] #[test] fn test_domain_encoding_quirks() { use url::quirks::{domain_to_ascii, domain_to_unicode};