diff --git a/Cargo.lock b/Cargo.lock index 28597af6..5eb5035a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,6 +1768,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "supports-hyperlinks" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0a1e5168041f5f3ff68ff7d95dcb9c8749df29f6e7e89ada40dd4c9de404ee" + [[package]] name = "syn" version = "2.0.58" @@ -2468,6 +2474,7 @@ dependencies = [ "serde-transcode", "serde_json", "serde_urlencoded", + "supports-hyperlinks", "syntect", "tempfile", "termcolor", diff --git a/Cargo.toml b/Cargo.toml index a1c9e6b2..b432161e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ dirs = "5.0" encoding_rs = "0.8.28" encoding_rs_io = "0.1.7" flate2 = "1.0.22" +hyper = { version = "1.2", default-features = false } indicatif = "0.17" jsonxf = "1.1.0" memchr = "2.4.1" @@ -42,6 +43,7 @@ serde = { version = "1.0", features = ["derive"] } serde-transcode = "1.1.1" serde_json = { version = "1.0", features = ["preserve_order"] } serde_urlencoded = "0.7.0" +supports-hyperlinks = "3.0.0" termcolor = "1.1.2" time = "0.3.16" unicode-width = "0.1.9" diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 00000000..7a3095c5 --- /dev/null +++ b/FAQ.md @@ -0,0 +1,21 @@ +

Why do some HTTP headers show up mangled?

+ +HTTP header values are officially only supposed to contain ASCII. Other bytes are "opaque data": + +> Historically, HTTP has allowed field content with text in the ISO-8859-1 charset [[ISO-8859-1](https://datatracker.ietf.org/doc/html/rfc7230#ref-ISO-8859-1)], supporting other charsets only through use of [[RFC2047](https://datatracker.ietf.org/doc/html/rfc2047)] encoding. In practice, most HTTP header field values use only a subset of the US-ASCII charset [[USASCII](https://datatracker.ietf.org/doc/html/rfc7230#ref-USASCII)]. Newly defined header fields SHOULD limit their field values to US-ASCII octets. A recipient SHOULD treat other octets in field content (obs-text) as opaque data. + +([RFC 7230](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.4)) + +In practice some headers are for some purposes treated like UTF-8, which supports all languages and characters in Unicode. But if you try to access header values through a browser's `fetch()` API or view them in the developer tools then they tend to be decoded as ISO-8859-1, which only supports a very limited number of characters and may not be the actual intended encoding. + +xh as of version 0.23.0 shows the ISO-8859-1 decoding by default to avoid a confusing difference with web browsers. If the value looks like valid UTF-8 then it additionally shows the UTF-8 decoding. + +That is, the following request: +```console +xh -v https://example.org Smile:☺ +``` +Displays the `Smile` header like this: +``` +Smile: â�º (UTF-8: ☺) +``` +The server will probably see `â�º` instead of the smiley. Or it might see `☺` after all. It depends! diff --git a/assets/syntax/basic/http.sublime-syntax b/assets/syntax/basic/http.sublime-syntax deleted file mode 100644 index 891bba0b..00000000 --- a/assets/syntax/basic/http.sublime-syntax +++ /dev/null @@ -1,33 +0,0 @@ -%YAML 1.2 ---- -# http://www.sublimetext.com/docs/3/syntax.html -name: HTTP -file_extensions: - - http - - rest -scope: source.http -contexts: - main: - - match: ^(?:([a-zA-Z]+(-?))\s+)?\s*(\S+)(?:\s+(((?i)HTTP(-?))(\/)(\S+)))?$ - scope: http.requestline - captures: - 1: keyword.control.http - 3: const.language.http - 5: keyword.other.http - 7: punctuation.separator.http - 8: constant.numeric.http - - match: '^([\w\-]+)\s*(:) ?(.*)$' - scope: http.requestheaders - captures: - 1: support.variable.http - 2: punctuation.separator.http - 3: string.other.http - - match: '^\s*((?i)HTTP(-?))(\/)(\S+)\s([1-5][0-9][0-9])\s(.*)$' - scope: http.responseLine - captures: - 1: keyword.other.http - 3: punctuation.separator.http - 4: constant.numeric.http - 4: constant.numeric.http - 5: constant.numeric.http - 6: keyword.reason.http diff --git a/assets/themes/ansi.tmTheme b/assets/themes/ansi.tmTheme index 92360eba..b5776539 100644 --- a/assets/themes/ansi.tmTheme +++ b/assets/themes/ansi.tmTheme @@ -165,6 +165,17 @@ #0C000000 + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/fruity.tmTheme b/assets/themes/fruity.tmTheme index 94276290..1010354f 100644 --- a/assets/themes/fruity.tmTheme +++ b/assets/themes/fruity.tmTheme @@ -163,6 +163,18 @@ #CA000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/monokai.tmTheme b/assets/themes/monokai.tmTheme index e78a15a1..3ef0bb3d 100644 --- a/assets/themes/monokai.tmTheme +++ b/assets/themes/monokai.tmTheme @@ -185,6 +185,18 @@ #C5000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/solarized.tmTheme b/assets/themes/solarized.tmTheme index 8004bc53..8919b30f 100644 --- a/assets/themes/solarized.tmTheme +++ b/assets/themes/solarized.tmTheme @@ -174,6 +174,18 @@ #21000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/src/buffer.rs b/src/buffer.rs index 407c89c6..19e8e970 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -301,8 +301,8 @@ impl Buffer { }) } - pub fn print(&mut self, s: impl AsRef<[u8]>) -> io::Result<()> { - self.write_all(s.as_ref()) + pub fn print(&mut self, s: &str) -> io::Result<()> { + self.write_all(s.as_bytes()) } pub fn guess_pretty(&self) -> Pretty { diff --git a/src/cli.rs b/src/cli.rs index a8f76cf6..14f2353b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1070,6 +1070,10 @@ impl Theme { Theme::Fruity => "fruity", } } + + pub(crate) fn as_syntect_theme(&self) -> &'static syntect::highlighting::Theme { + &crate::formatting::THEMES.themes[self.as_str()] + } } #[derive(Debug, Clone, Copy)] diff --git a/src/formatting.rs b/src/formatting.rs index 7fc2a41e..f752d1f4 100644 --- a/src/formatting.rs +++ b/src/formatting.rs @@ -1,4 +1,7 @@ -use std::io::{self, Write}; +use std::{ + io::{self, Write}, + sync::OnceLock, +}; use syntect::dumps::from_binary; use syntect::easy::HighlightLines; @@ -9,6 +12,9 @@ use termcolor::WriteColor; use crate::{buffer::Buffer, cli::Theme}; +pub(crate) mod headers; +pub(crate) mod palette; + pub fn get_json_formatter(indent_level: usize) -> jsonxf::Formatter { let mut fmt = jsonxf::Formatter::pretty_printer(); fmt.indent = " ".repeat(indent_level); @@ -30,7 +36,7 @@ pub fn serde_json_format(indent_level: usize, text: &str, write: impl Write) -> Ok(()) } -static TS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { +pub(crate) static THEMES: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { from_binary(include_bytes!(concat!( env!("OUT_DIR"), "/themepack.themedump" @@ -53,14 +59,14 @@ pub struct Highlighter<'a> { impl<'a> Highlighter<'a> { pub fn new(syntax: &'static str, theme: Theme, out: &'a mut Buffer) -> Self { let syntax_set: &SyntaxSet = match syntax { - "json" | "http" => &PS_BASIC, + "json" => &PS_BASIC, _ => &PS_LARGE, }; let syntax = syntax_set .find_syntax_by_extension(syntax) .expect("syntax not found"); Self { - highlighter: HighlightLines::new(syntax, &TS.themes[theme.as_str()]), + highlighter: HighlightLines::new(syntax, theme.as_syntect_theme()), syntax_set, out, } @@ -103,7 +109,9 @@ fn convert_style(style: syntect::highlighting::Style) -> termcolor::ColorSpec { use syntect::highlighting::FontStyle; let mut spec = termcolor::ColorSpec::new(); spec.set_fg(convert_color(style.foreground)) - .set_underline(style.font_style.contains(FontStyle::UNDERLINE)); + .set_underline(style.font_style.contains(FontStyle::UNDERLINE)) + .set_bold(style.font_style.contains(FontStyle::BOLD)) + .set_italic(style.font_style.contains(FontStyle::ITALIC)); spec } @@ -142,3 +150,13 @@ fn convert_color(color: syntect::highlighting::Color) -> Option bool { + static SUPPORTS_HYPERLINKS: OnceLock = OnceLock::new(); + *SUPPORTS_HYPERLINKS.get_or_init(supports_hyperlinks::supports_hyperlinks) +} + +pub(crate) fn create_hyperlink(text: &str, url: &str) -> String { + // https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda + format!("\x1B]8;;{url}\x1B\\{text}\x1B]8;;\x1B\\") +} diff --git a/src/formatting/headers.rs b/src/formatting/headers.rs new file mode 100644 index 00000000..3e1711dc --- /dev/null +++ b/src/formatting/headers.rs @@ -0,0 +1,292 @@ +use std::io::Result; + +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Method, StatusCode, Version, +}; +use syntect::highlighting::Theme; +use termcolor::WriteColor; +use url::Url; + +use crate::utils::HeaderValueExt; + +super::palette::palette! { + struct HeaderPalette { + http_keyword: ["keyword.other.http"], + http_separator: ["punctuation.separator.http"], + http_version: ["constant.numeric.http"], + method: ["keyword.control.http"], + path: ["const.language.http"], + status_code: ["constant.numeric.http"], + status_reason: ["keyword.reason.http"], + header_name: ["source.http", "http.requestheaders", "support.variable.http"], + header_colon: ["source.http", "http.requestheaders", "punctuation.separator.http"], + header_value: ["source.http", "http.requestheaders", "string.other.http"], + error: ["error"], + } +} + +macro_rules! set_color { + ($self:ident, $color:ident) => { + if let Some(ref palette) = $self.palette { + $self.output.set_color(&palette.$color) + } else { + Ok(()) + } + }; +} + +pub(crate) struct HeaderFormatter<'a, W: WriteColor> { + output: &'a mut W, + palette: Option, + is_terminal: bool, + sort_headers: bool, +} + +impl<'a, W: WriteColor> HeaderFormatter<'a, W> { + pub(crate) fn new( + output: &'a mut W, + theme: Option<&Theme>, + is_terminal: bool, + sort_headers: bool, + ) -> Self { + Self { + palette: theme.map(HeaderPalette::from), + output, + is_terminal, + sort_headers, + } + } + + fn print(&mut self, text: &str) -> Result<()> { + self.output.write_all(text.as_bytes()) + } + + fn print_plain(&mut self, text: &str) -> Result<()> { + set_color!(self, default)?; + self.print(text) + } + + pub(crate) fn print_request_headers( + &mut self, + method: &Method, + url: &Url, + version: Version, + headers: &HeaderMap, + ) -> Result<()> { + set_color!(self, method)?; + self.print(method.as_str())?; + + self.print_plain(" ")?; + + set_color!(self, path)?; + self.print(url.path())?; + if let Some(query) = url.query() { + self.print("?")?; + self.print(query)?; + } + + self.print_plain(" ")?; + self.print_http_version(version)?; + + self.print_plain("\n")?; + self.print_headers(headers, version)?; + + if self.palette.is_some() { + self.output.reset()?; + } + Ok(()) + } + + pub(crate) fn print_response_headers( + &mut self, + version: Version, + status: StatusCode, + reason_phrase: &str, + headers: &HeaderMap, + ) -> Result<()> { + self.print_http_version(version)?; + + self.print_plain(" ")?; + + set_color!(self, status_code)?; + self.print(status.as_str())?; + + self.print_plain(" ")?; + + set_color!(self, status_reason)?; + self.print(reason_phrase)?; + + self.print_plain("\n")?; + + self.print_headers(headers, version)?; + + if self.palette.is_some() { + self.output.reset()?; + } + Ok(()) + } + + fn print_http_version(&mut self, version: Version) -> Result<()> { + let version = format!("{version:?}"); + let version = version.strip_prefix("HTTP/").unwrap_or(&version); + + set_color!(self, http_keyword)?; + self.print("HTTP")?; + set_color!(self, http_separator)?; + self.print("/")?; + set_color!(self, http_version)?; + self.print(version)?; + + Ok(()) + } + + fn print_headers(&mut self, headers: &HeaderMap, version: Version) -> Result<()> { + let as_titlecase = match version { + Version::HTTP_09 | Version::HTTP_10 | Version::HTTP_11 => true, + Version::HTTP_2 | Version::HTTP_3 => false, + _ => false, + }; + let mut headers: Vec<(&HeaderName, &HeaderValue)> = headers.iter().collect(); + if self.sort_headers { + headers.sort_by_key(|(name, _)| name.as_str()); + } + + let mut namebuf = String::with_capacity(64); + for (name, value) in headers { + let key = if as_titlecase { + titlecase_header(name, &mut namebuf) + } else { + name.as_str() + }; + + set_color!(self, header_name)?; + self.print(key)?; + set_color!(self, header_colon)?; + self.print(":")?; + self.print_plain(" ")?; + + match value.to_ascii_or_latin1() { + Ok(ascii) => { + set_color!(self, header_value)?; + self.print(ascii)?; + } + Err(bad) => { + const FAQ_URL: &str = + "https://github.com/ducaale/xh/blob/master/FAQ.md#header-value-encoding"; + + let mut latin1 = bad.latin1(); + if self.is_terminal { + latin1 = sanitize_header_value(&latin1); + } + set_color!(self, error)?; + self.print(&latin1)?; + + if let Some(utf8) = bad.utf8() { + set_color!(self, default)?; + if self.palette.is_some() && super::supports_hyperlinks() { + self.print(" (")?; + self.print(&super::create_hyperlink("UTF-8", FAQ_URL))?; + self.print(": ")?; + } else { + self.print(" (UTF-8: ")?; + } + + set_color!(self, header_value)?; + // We could escape these as well but latin1 has a much higher chance + // to contain control characters because: + // - ~14% of the possible latin1 codepoints are control characters, + // versus <0.1% for UTF-8. + // - The latin1 text may not be intended as latin1, but if it's valid + // as UTF-8 then chances are that it really is UTF-8. + // We should revisit this if we come up with a general policy for + // escaping control characters, not just in headers. + self.print(utf8)?; + self.print_plain(")")?; + } + } + } + self.print_plain("\n")?; + } + + Ok(()) + } +} + +fn titlecase_header<'b>(name: &HeaderName, buffer: &'b mut String) -> &'b str { + let name = name.as_str(); + buffer.clear(); + buffer.reserve(name.len()); + // Ought to be equivalent to how hyper does it + // https://github.com/hyperium/hyper/blob/f46b175bf71b202fbb907c4970b5743881b891e1/src/proto/h1/role.rs#L1332 + // Header names are ASCII so operating on char or u8 is equivalent + let mut prev = '-'; + for mut c in name.chars() { + if prev == '-' { + c.make_ascii_uppercase(); + } + buffer.push(c); + prev = c; + } + buffer +} + +/// Escape control characters. Firefox uses Unicode replacement characters, +/// that seems like a good choice. +/// +/// Header values can't contain ASCII control characters (like newlines) +/// but if misencoded they frequently contain latin1 control characters. +/// What we do here might not make sense for other strings. +fn sanitize_header_value(value: &str) -> String { + const REPLACEMENT_CHARACTER: &str = "\u{FFFD}"; + value.replace(char::is_control, REPLACEMENT_CHARACTER) +} + +#[cfg(test)] +mod tests { + use indoc::indoc; + + use super::*; + + #[test] + fn test_header_casing() { + let mut headers = HeaderMap::new(); + headers.insert("ab-cd", "0".parse().unwrap()); + headers.insert("-cd", "0".parse().unwrap()); + headers.insert("-", "0".parse().unwrap()); + headers.insert("ab-%c", "0".parse().unwrap()); + headers.insert("A-b--C", "0".parse().unwrap()); + + let mut buf = termcolor::Ansi::new(Vec::new()); + let mut formatter = HeaderFormatter::new(&mut buf, None, false, false); + formatter.print_headers(&headers, Version::HTTP_11).unwrap(); + let buf = buf.into_inner(); + assert_eq!( + buf, + indoc! {b" + Ab-Cd: 0 + -Cd: 0 + -: 0 + Ab-%c: 0 + A-B--C: 0 + " + } + ); + + let mut buf = termcolor::Ansi::new(Vec::new()); + let mut formatter = HeaderFormatter::new(&mut buf, None, false, false); + formatter.print_headers(&headers, Version::HTTP_2).unwrap(); + let buf = buf.into_inner(); + assert_eq!( + buf, + indoc! {b" + ab-cd: 0 + -cd: 0 + -: 0 + ab-%c: 0 + a-b--c: 0 + " + } + ); + } +} diff --git a/src/formatting/palette.rs b/src/formatting/palette.rs new file mode 100644 index 00000000..50f62d73 --- /dev/null +++ b/src/formatting/palette.rs @@ -0,0 +1,68 @@ +//! We used to use syntect for all of our coloring and we still use syntect-compatible +//! files to store themes. +//! +//! But we've started coloring some things manually for better control (and potentially +//! for better efficiency). This macro loads colors from themes and exposes them as +//! fields on a struct. See [`super::headers`] for an example. + +macro_rules! palette { + { + $vis:vis struct $name:ident { + $($color:ident: $scopes:expr,)* + } + } => { + $vis struct $name { + $(pub $color: ::termcolor::ColorSpec,)* + #[allow(unused)] + pub default: ::termcolor::ColorSpec, + } + + impl From<&::syntect::highlighting::Theme> for $name { + fn from(theme: &::syntect::highlighting::Theme) -> Self { + let highlighter = ::syntect::highlighting::Highlighter::new(theme); + let mut parsed_scopes = ::std::vec::Vec::new(); + Self { + $($color: $crate::formatting::palette::util::extract_color( + &highlighter, + &$scopes, + &mut parsed_scopes, + ),)* + default: $crate::formatting::palette::util::extract_default(theme), + } + } + } + } +} + +pub(crate) use palette; + +pub(crate) mod util { + use syntect::{ + highlighting::{Highlighter, Theme}, + parsing::Scope, + }; + use termcolor::ColorSpec; + + use crate::formatting::{convert_color, convert_style}; + + #[inline(never)] + pub(crate) fn extract_color( + highlighter: &Highlighter, + scopes: &[&str], + parsebuf: &mut Vec, + ) -> ColorSpec { + parsebuf.clear(); + parsebuf.extend(scopes.iter().map(|s| s.parse::().unwrap())); + let style = highlighter.style_for_stack(parsebuf); + convert_style(style) + } + + #[inline(never)] + pub(crate) fn extract_default(theme: &Theme) -> ColorSpec { + let mut color = ColorSpec::new(); + if let Some(foreground) = theme.settings.foreground { + color.set_fg(convert_color(foreground)); + } + color + } +} diff --git a/src/main.rs b/src/main.rs index ac1600f0..6061e122 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,6 +35,7 @@ use reqwest::header::{ }; use reqwest::tls; use url::Host; +use utils::reason_phrase; use crate::auth::{Auth, DigestAuthMiddleware}; use crate::buffer::Buffer; @@ -586,7 +587,7 @@ fn run(args: Cli) -> Result { // HTTPie looks at --quiet, since --quiet always suppresses the response // headers even if you pass --print=h. But --print takes precedence for us. if exit_code != 0 && (is_output_redirected || !print.response_headers) { - log::warn!("HTTP {status}"); + log::warn!("HTTP {} {}", status.as_u16(), reason_phrase(&response)); } } diff --git a/src/printer.rs b/src/printer.rs index 515ef950..9ac1769b 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -7,12 +7,11 @@ use encoding_rs_io::DecodeReaderBytesBuilder; use mime::Mime; use reqwest::blocking::{Body, Request, Response}; use reqwest::cookie::CookieStore; -use reqwest::header::{ - HeaderMap, HeaderName, HeaderValue, ACCEPT, CONTENT_LENGTH, CONTENT_TYPE, COOKIE, HOST, -}; -use reqwest::Version; +use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, CONTENT_LENGTH, CONTENT_TYPE, COOKIE, HOST}; use url::Url; +use crate::formatting::headers::HeaderFormatter; +use crate::utils::reason_phrase; use crate::{ buffer::Buffer, cli::FormatOptions, @@ -142,6 +141,16 @@ impl Printer { Highlighter::new(syntax, self.theme, &mut self.buffer) } + fn get_header_formatter(&mut self) -> HeaderFormatter<'_, Buffer> { + let is_terminal = self.buffer.is_terminal(); + HeaderFormatter::new( + &mut self.buffer, + self.color.then(|| self.theme.as_syntect_theme()), + is_terminal, + self.sort_headers, + ) + } + fn print_colorized_text(&mut self, text: &str, syntax: &'static str) -> io::Result<()> { self.get_highlighter(syntax).highlight(text) } @@ -299,55 +308,6 @@ impl Printer { } } - fn print_headers(&mut self, text: &str) -> io::Result<()> { - if self.color { - self.print_colorized_text(text, "http") - } else { - self.buffer.print(text) - } - } - - fn headers_to_string(&self, headers: &HeaderMap, version: Version) -> String { - let as_titlecase = match version { - Version::HTTP_09 | Version::HTTP_10 | Version::HTTP_11 => true, - Version::HTTP_2 | Version::HTTP_3 => false, - _ => false, - }; - let mut headers: Vec<(&HeaderName, &HeaderValue)> = headers.iter().collect(); - if self.sort_headers { - headers.sort_by_key(|(name, _)| name.as_str()); - } - - let mut header_string = String::new(); - for (key, value) in headers { - if as_titlecase { - // Ought to be equivalent to how hyper does it - // https://github.com/hyperium/hyper/blob/f46b175bf71b202fbb907c4970b5743881b891e1/src/proto/h1/role.rs#L1332 - // Header names are ASCII so it's ok to operate on char instead of u8 - let mut prev = '-'; - for mut c in key.as_str().chars() { - if prev == '-' { - c.make_ascii_uppercase(); - } - header_string.push(c); - prev = c; - } - } else { - header_string.push_str(key.as_str()); - } - header_string.push_str(": "); - match value.to_str() { - Ok(value) => header_string.push_str(value), - #[allow(clippy::format_push_string)] - Err(_) => header_string.push_str(&format!("{:?}", value)), - } - header_string.push('\n'); - } - header_string.pop(); - - header_string - } - pub fn print_separator(&mut self) -> io::Result<()> { self.buffer.print("\n")?; self.buffer.flush()?; @@ -358,9 +318,7 @@ impl Printer { where T: CookieStore, { - let method = request.method(); let url = request.url(); - let query_string = url.query().map_or(String::from(""), |q| ["?", q].concat()); let version = request.version(); let mut headers = request.headers().clone(); @@ -376,16 +334,17 @@ impl Printer { // reqwest and hyper add certain headers, but only in the process of // sending the request, which we haven't done yet if let Some(body) = request.body().and_then(Body::as_bytes) { - // Added at https://github.com/seanmonstar/reqwest/blob/e56bd160ba/src/blocking/request.rs#L132 + // Added at https://github.com/seanmonstar/reqwest/blob/c4ebb07343/src/blocking/request.rs#L144 headers .entry(CONTENT_LENGTH) .or_insert_with(|| body.len().into()); } if let Some(host) = request.url().host_str() { - // This is incorrect in case of HTTP/2, but we're already assuming - // HTTP/1.1 anyway + // FIXME: in case of HTTP/2 we probably don't send this. But we probably + // do send the :authority pseudo-header, and without --http-version we don't + // even know if we're going to use HTTP/2 yet. headers.entry(HOST).or_insert_with(|| { - // Added at https://github.com/hyperium/hyper/blob/dfa1bb291d/src/client/client.rs#L237 + // Added at https://github.com/hyperium/hyper-util/blob/53aadac50d/src/client/legacy/client.rs#L278 if test_mode() { HeaderValue::from_str("http.mock") } else if let Some(port) = request.url().port() { @@ -397,25 +356,27 @@ impl Printer { }); } - let request_line = format!("{} {}{} {:?}\n", method, url.path(), query_string, version); - let headers = self.headers_to_string(&headers, version); + self.get_header_formatter().print_request_headers( + request.method(), + request.url(), + version, + &headers, + )?; - self.print_headers(&(request_line + &headers))?; - self.buffer.print("\n\n")?; + self.buffer.print("\n")?; self.buffer.flush()?; Ok(()) } pub fn print_response_headers(&mut self, response: &Response) -> io::Result<()> { - let version = response.version(); - let status = response.status(); - let headers = response.headers(); - - let status_line = format!("{:?} {}\n", version, status); - let headers = self.headers_to_string(headers, version); + self.get_header_formatter().print_response_headers( + response.version(), + response.status(), + &reason_phrase(response), + response.headers(), + )?; - self.print_headers(&(status_line + &headers))?; - self.buffer.print("\n\n")?; + self.buffer.print("\n")?; self.buffer.flush()?; Ok(()) } @@ -484,7 +445,7 @@ impl Printer { } else { let mut buf = Vec::new(); body.read_to_end(&mut buf)?; - self.buffer.print(&buf)?; + self.buffer.write_all(&buf)?; } } else if stream { match self @@ -524,11 +485,11 @@ impl Printer { total_elapsed_time += content_download_duration.as_secs_f64(); } self.buffer - .print(format!("Elapsed time: {:.5}s\n", total_elapsed_time))?; + .print(&format!("Elapsed time: {:.5}s\n", total_elapsed_time))?; if let Some(remote_addr) = response.remote_addr() { self.buffer - .print(format!("Remote address: {:?}\n", remote_addr))?; + .print(&format!("Remote address: {:?}\n", remote_addr))?; } self.buffer.print("\n")?; @@ -751,8 +712,6 @@ fn get_charset(response: &Response) -> Option<&'static Encoding> { #[cfg(test)] mod tests { - use indoc::indoc; - use crate::utils::random_string; use crate::{buffer::Buffer, cli::Cli, vec_of_strings}; @@ -841,46 +800,4 @@ mod tests { assert_eq!(p.color, true); assert!(p.buffer.is_stderr()); } - - #[test] - fn test_header_casing() { - let p = Printer { - json_indent_level: 4, - format_json: false, - sort_headers: false, - color: false, - theme: Theme::Auto, - stream: false.into(), - buffer: Buffer::new(false, None, false).unwrap(), - }; - - let mut headers = HeaderMap::new(); - headers.insert("ab-cd", "0".parse().unwrap()); - headers.insert("-cd", "0".parse().unwrap()); - headers.insert("-", "0".parse().unwrap()); - headers.insert("ab-%c", "0".parse().unwrap()); - headers.insert("A-b--C", "0".parse().unwrap()); - - assert_eq!( - p.headers_to_string(&headers, reqwest::Version::HTTP_11), - indoc! {" - Ab-Cd: 0 - -Cd: 0 - -: 0 - Ab-%c: 0 - A-B--C: 0" - } - ); - - assert_eq!( - p.headers_to_string(&headers, reqwest::Version::HTTP_2), - indoc! {" - ab-cd: 0 - -cd: 0 - -: 0 - ab-%c: 0 - a-b--c: 0" - } - ); - } } diff --git a/src/utils.rs b/src/utils.rs index 2916d73a..05dd84af 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::str::Utf8Error; use anyhow::Result; -use reqwest::blocking::Request; +use reqwest::blocking::{Request, Response}; use reqwest::header::HeaderValue; use url::Url; @@ -182,10 +182,94 @@ pub fn copy_largebuf( pub(crate) trait HeaderValueExt { fn to_utf8_str(&self) -> Result<&str, Utf8Error>; + + fn to_ascii_or_latin1(&self) -> Result<&str, BadHeaderValue<'_>>; } impl HeaderValueExt for HeaderValue { fn to_utf8_str(&self) -> Result<&str, Utf8Error> { std::str::from_utf8(self.as_bytes()) } + + /// If the value is pure ASCII, return Ok(). If not, return Err() with methods for + /// further handling. + /// + /// The Ok() version cannot contain control characters (not even ASCII ones). + fn to_ascii_or_latin1(&self) -> Result<&str, BadHeaderValue<'_>> { + self.to_str().map_err(|_| BadHeaderValue { value: self }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct BadHeaderValue<'a> { + value: &'a HeaderValue, +} + +impl<'a> BadHeaderValue<'a> { + /// Return the header value's latin1 decoding, AKA isomorphic decode, + /// AKA ISO-8859-1 decode. This is how browsers tend to handle it. + /// + /// Not to be confused with ISO 8859-1 (which leaves 0x8X and 0x9X unmapped) + /// or with Windows-1252 (which is how HTTP bodies are decoded if they + /// declare `Content-Encoding: iso-8859-1`). + /// + /// Is likely to contain control characters. Consider replacing these. + pub(crate) fn latin1(self) -> String { + // https://infra.spec.whatwg.org/#isomorphic-decode + self.value.as_bytes().iter().map(|&b| b as char).collect() + } + + /// Return the header value's UTF-8 decoding. This is most likely what the + /// user expects, but when browsers prefer another encoding we should give + /// that one precedence. + pub(crate) fn utf8(self) -> Option<&'a str> { + self.value.to_utf8_str().ok() + } +} + +pub(crate) fn reason_phrase(response: &Response) -> Cow<'_, str> { + if let Some(reason) = response.extensions().get::() { + // The server sent a non-standard reason phrase. + // Seems like some browsers interpret this as latin1 and others as UTF-8? + // Rare case and clients aren't supposed to pay attention to the reason + // phrase so let's just do UTF-8 for convenience. + // We could send the bytes straight to stdout/stderr in case they're some + // other encoding but that's probably not worth the effort. + String::from_utf8_lossy(reason.as_bytes()) + } else if let Some(reason) = response.status().canonical_reason() { + // On HTTP/2+ no reason phrase is sent so we're just explaining the code + // to the user. + // On HTTP/1.1 and below this matches the reason the server actually sent + // or else hyper would have added a ReasonPhrase. + Cow::Borrowed(reason) + } else { + // Only reachable in case of an unknown status code over HTTP/2+. + // curl prints nothing in this case. + Cow::Borrowed("") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_latin1() { + let good = HeaderValue::from_static("Rhodes"); + let good = good.to_ascii_or_latin1(); + + assert_eq!(good, Ok("Rhodes")); + + let bad = HeaderValue::from_bytes("Ῥόδος".as_bytes()).unwrap(); + let bad = bad.to_ascii_or_latin1().unwrap_err(); + + assert_eq!(bad.latin1(), "ῬÏ\u{8c}δοÏ\u{82}"); + assert_eq!(bad.utf8(), Some("Ῥόδος")); + + let worse = HeaderValue::from_bytes(b"R\xF3dos").unwrap(); + let worse = worse.to_ascii_or_latin1().unwrap_err(); + + assert_eq!(worse.latin1(), "Ródos"); + assert_eq!(worse.utf8(), None); + } } diff --git a/tests/cli.rs b/tests/cli.rs index 6c54d5a3..f9c05624 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -18,6 +18,7 @@ use http_body_util::BodyExt; use indoc::indoc; use predicates::function::function; use predicates::str::contains; +use reqwest::header::HeaderValue; use tempfile::{tempdir, NamedTempFile, TempDir}; pub trait RequestExt { @@ -1709,7 +1710,31 @@ fn support_utf8_header_value() { HTTP/1.1 200 OK Content-Length: 0 Date: N/A - Hello: "\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x91\x80" + Hello: 你好å�� (UTF-8: 你好呀) + + + "#}) + .success(); +} + +#[test] +fn support_latin1_header_value() { + let server = server::http(|_req| async move { + hyper::Response::builder() + .header("hello", HeaderValue::from_bytes(b"R\xF3dos").unwrap()) + .header("Date", "N/A") + .body("".into()) + .unwrap() + }); + + get_command() + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 OK + Content-Length: 0 + Date: N/A + Hello: Ródos "#}) @@ -1748,7 +1773,7 @@ fn redirect_support_utf8_location() { HTTP/1.1 302 Found Content-Length: 14 Date: N/A - Location: "/page\xe4\xba\x8c" + Location: /pageäº� (UTF-8: /page二) redirecting... @@ -3747,3 +3772,25 @@ fn multiple_format_options_are_merged() { "#}); } + +#[test] +fn reason_phrase_is_preserved() { + let server = server::http(|_req| async move { + let mut response = hyper::Response::builder(); + response + .extensions_mut() + .unwrap() + .insert(hyper::ext::ReasonPhrase::from_static(b"Wonderful")); + response.header("Date", "N/A").body("".into()).unwrap() + }); + get_command() + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 Wonderful + Content-Length: 0 + Date: N/A + + + "#}); +}