From 00bc6f22381b7ef4871c66e209fac1ed854e2353 Mon Sep 17 00:00:00 2001 From: Jan Verbeek Date: Sun, 30 Jun 2024 21:58:01 +0200 Subject: [PATCH] Decode headers as latin1/UTF-8, show real reason phrase External changes: - We now print the actual reason phrase sent by the server instead of guessing it from the status code. That is, if servers reply with "200 Wonderful" instead of "200 OK" then we show that. This is especially useful for status codes that xh doesn't recognize. - Header values are now decoded as latin1, with the UTF-8 decoding also shown if applicable. - A new FAQ file with an entry that explains header value encoding. Header output now hyperlinks to this entry when relevant and if supported by the terminal. Under the hood we now color headers manually. It's still hooked up to the `.tmTheme` files but not to the `.sublime-syntax` file. This lets us highlight the latin1 header values differently. In the future we could use the same approach to optimize JSON highlighting. I'm unsure about the position of the hyperlink. Currently it's the text "UTF-8" in ` (UTF-8: )`. But that means it's only shown if the value can be decoded as UTF-8. An alternative is to turn the latin1 value itself into a hyperlink, but that's confusing if the value itself is already a URL (which is a common case for the `Location` header). I also don't feel that our text is quite distinct enough from the header value in the default `ansi` theme. Though the hyperlink does help to set it apart. --- Cargo.lock | 7 + Cargo.toml | 2 + FAQ.md | 21 ++ assets/syntax/basic/http.sublime-syntax | 33 --- assets/themes/ansi.tmTheme | 13 +- assets/themes/fruity.tmTheme | 14 +- assets/themes/monokai.tmTheme | 14 +- assets/themes/solarized.tmTheme | 14 +- src/buffer.rs | 4 +- src/cli.rs | 4 + src/formatting.rs | 28 ++- src/formatting/headers.rs | 292 ++++++++++++++++++++++++ src/formatting/palette.rs | 68 ++++++ src/main.rs | 3 +- src/printer.rs | 153 +++---------- src/utils.rs | 86 ++++++- tests/cli.rs | 51 ++++- 17 files changed, 641 insertions(+), 166 deletions(-) create mode 100644 FAQ.md delete mode 100644 assets/syntax/basic/http.sublime-syntax create mode 100644 src/formatting/headers.rs create mode 100644 src/formatting/palette.rs diff --git a/Cargo.lock b/Cargo.lock index 28597af6..5eb5035a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,6 +1768,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "supports-hyperlinks" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0a1e5168041f5f3ff68ff7d95dcb9c8749df29f6e7e89ada40dd4c9de404ee" + [[package]] name = "syn" version = "2.0.58" @@ -2468,6 +2474,7 @@ dependencies = [ "serde-transcode", "serde_json", "serde_urlencoded", + "supports-hyperlinks", "syntect", "tempfile", "termcolor", diff --git a/Cargo.toml b/Cargo.toml index a1c9e6b2..b432161e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ dirs = "5.0" encoding_rs = "0.8.28" encoding_rs_io = "0.1.7" flate2 = "1.0.22" +hyper = { version = "1.2", default-features = false } indicatif = "0.17" jsonxf = "1.1.0" memchr = "2.4.1" @@ -42,6 +43,7 @@ serde = { version = "1.0", features = ["derive"] } serde-transcode = "1.1.1" serde_json = { version = "1.0", features = ["preserve_order"] } serde_urlencoded = "0.7.0" +supports-hyperlinks = "3.0.0" termcolor = "1.1.2" time = "0.3.16" unicode-width = "0.1.9" diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 00000000..7a3095c5 --- /dev/null +++ b/FAQ.md @@ -0,0 +1,21 @@ +

Why do some HTTP headers show up mangled?

+ +HTTP header values are officially only supposed to contain ASCII. Other bytes are "opaque data": + +> Historically, HTTP has allowed field content with text in the ISO-8859-1 charset [[ISO-8859-1](https://datatracker.ietf.org/doc/html/rfc7230#ref-ISO-8859-1)], supporting other charsets only through use of [[RFC2047](https://datatracker.ietf.org/doc/html/rfc2047)] encoding. In practice, most HTTP header field values use only a subset of the US-ASCII charset [[USASCII](https://datatracker.ietf.org/doc/html/rfc7230#ref-USASCII)]. Newly defined header fields SHOULD limit their field values to US-ASCII octets. A recipient SHOULD treat other octets in field content (obs-text) as opaque data. + +([RFC 7230](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.4)) + +In practice some headers are for some purposes treated like UTF-8, which supports all languages and characters in Unicode. But if you try to access header values through a browser's `fetch()` API or view them in the developer tools then they tend to be decoded as ISO-8859-1, which only supports a very limited number of characters and may not be the actual intended encoding. + +xh as of version 0.23.0 shows the ISO-8859-1 decoding by default to avoid a confusing difference with web browsers. If the value looks like valid UTF-8 then it additionally shows the UTF-8 decoding. + +That is, the following request: +```console +xh -v https://example.org Smile:☺ +``` +Displays the `Smile` header like this: +``` +Smile: â�º (UTF-8: ☺) +``` +The server will probably see `â�º` instead of the smiley. Or it might see `☺` after all. It depends! diff --git a/assets/syntax/basic/http.sublime-syntax b/assets/syntax/basic/http.sublime-syntax deleted file mode 100644 index 891bba0b..00000000 --- a/assets/syntax/basic/http.sublime-syntax +++ /dev/null @@ -1,33 +0,0 @@ -%YAML 1.2 ---- -# http://www.sublimetext.com/docs/3/syntax.html -name: HTTP -file_extensions: - - http - - rest -scope: source.http -contexts: - main: - - match: ^(?:([a-zA-Z]+(-?))\s+)?\s*(\S+)(?:\s+(((?i)HTTP(-?))(\/)(\S+)))?$ - scope: http.requestline - captures: - 1: keyword.control.http - 3: const.language.http - 5: keyword.other.http - 7: punctuation.separator.http - 8: constant.numeric.http - - match: '^([\w\-]+)\s*(:) ?(.*)$' - scope: http.requestheaders - captures: - 1: support.variable.http - 2: punctuation.separator.http - 3: string.other.http - - match: '^\s*((?i)HTTP(-?))(\/)(\S+)\s([1-5][0-9][0-9])\s(.*)$' - scope: http.responseLine - captures: - 1: keyword.other.http - 3: punctuation.separator.http - 4: constant.numeric.http - 4: constant.numeric.http - 5: constant.numeric.http - 6: keyword.reason.http diff --git a/assets/themes/ansi.tmTheme b/assets/themes/ansi.tmTheme index 92360eba..b5776539 100644 --- a/assets/themes/ansi.tmTheme +++ b/assets/themes/ansi.tmTheme @@ -165,6 +165,17 @@ #0C000000 + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/fruity.tmTheme b/assets/themes/fruity.tmTheme index 94276290..1010354f 100644 --- a/assets/themes/fruity.tmTheme +++ b/assets/themes/fruity.tmTheme @@ -163,6 +163,18 @@ #CA000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/monokai.tmTheme b/assets/themes/monokai.tmTheme index e78a15a1..3ef0bb3d 100644 --- a/assets/themes/monokai.tmTheme +++ b/assets/themes/monokai.tmTheme @@ -185,6 +185,18 @@ #C5000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/assets/themes/solarized.tmTheme b/assets/themes/solarized.tmTheme index 8004bc53..8919b30f 100644 --- a/assets/themes/solarized.tmTheme +++ b/assets/themes/solarized.tmTheme @@ -174,6 +174,18 @@ #21000000 + + + name + Error + scope + error + settings + + foreground + #01000000 + + - \ No newline at end of file + diff --git a/src/buffer.rs b/src/buffer.rs index 407c89c6..19e8e970 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -301,8 +301,8 @@ impl Buffer { }) } - pub fn print(&mut self, s: impl AsRef<[u8]>) -> io::Result<()> { - self.write_all(s.as_ref()) + pub fn print(&mut self, s: &str) -> io::Result<()> { + self.write_all(s.as_bytes()) } pub fn guess_pretty(&self) -> Pretty { diff --git a/src/cli.rs b/src/cli.rs index a8f76cf6..14f2353b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1070,6 +1070,10 @@ impl Theme { Theme::Fruity => "fruity", } } + + pub(crate) fn as_syntect_theme(&self) -> &'static syntect::highlighting::Theme { + &crate::formatting::THEMES.themes[self.as_str()] + } } #[derive(Debug, Clone, Copy)] diff --git a/src/formatting.rs b/src/formatting.rs index 7fc2a41e..f752d1f4 100644 --- a/src/formatting.rs +++ b/src/formatting.rs @@ -1,4 +1,7 @@ -use std::io::{self, Write}; +use std::{ + io::{self, Write}, + sync::OnceLock, +}; use syntect::dumps::from_binary; use syntect::easy::HighlightLines; @@ -9,6 +12,9 @@ use termcolor::WriteColor; use crate::{buffer::Buffer, cli::Theme}; +pub(crate) mod headers; +pub(crate) mod palette; + pub fn get_json_formatter(indent_level: usize) -> jsonxf::Formatter { let mut fmt = jsonxf::Formatter::pretty_printer(); fmt.indent = " ".repeat(indent_level); @@ -30,7 +36,7 @@ pub fn serde_json_format(indent_level: usize, text: &str, write: impl Write) -> Ok(()) } -static TS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { +pub(crate) static THEMES: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { from_binary(include_bytes!(concat!( env!("OUT_DIR"), "/themepack.themedump" @@ -53,14 +59,14 @@ pub struct Highlighter<'a> { impl<'a> Highlighter<'a> { pub fn new(syntax: &'static str, theme: Theme, out: &'a mut Buffer) -> Self { let syntax_set: &SyntaxSet = match syntax { - "json" | "http" => &PS_BASIC, + "json" => &PS_BASIC, _ => &PS_LARGE, }; let syntax = syntax_set .find_syntax_by_extension(syntax) .expect("syntax not found"); Self { - highlighter: HighlightLines::new(syntax, &TS.themes[theme.as_str()]), + highlighter: HighlightLines::new(syntax, theme.as_syntect_theme()), syntax_set, out, } @@ -103,7 +109,9 @@ fn convert_style(style: syntect::highlighting::Style) -> termcolor::ColorSpec { use syntect::highlighting::FontStyle; let mut spec = termcolor::ColorSpec::new(); spec.set_fg(convert_color(style.foreground)) - .set_underline(style.font_style.contains(FontStyle::UNDERLINE)); + .set_underline(style.font_style.contains(FontStyle::UNDERLINE)) + .set_bold(style.font_style.contains(FontStyle::BOLD)) + .set_italic(style.font_style.contains(FontStyle::ITALIC)); spec } @@ -142,3 +150,13 @@ fn convert_color(color: syntect::highlighting::Color) -> Option bool { + static SUPPORTS_HYPERLINKS: OnceLock = OnceLock::new(); + *SUPPORTS_HYPERLINKS.get_or_init(supports_hyperlinks::supports_hyperlinks) +} + +pub(crate) fn create_hyperlink(text: &str, url: &str) -> String { + // https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda + format!("\x1B]8;;{url}\x1B\\{text}\x1B]8;;\x1B\\") +} diff --git a/src/formatting/headers.rs b/src/formatting/headers.rs new file mode 100644 index 00000000..3e1711dc --- /dev/null +++ b/src/formatting/headers.rs @@ -0,0 +1,292 @@ +use std::io::Result; + +use reqwest::{ + header::{HeaderMap, HeaderName, HeaderValue}, + Method, StatusCode, Version, +}; +use syntect::highlighting::Theme; +use termcolor::WriteColor; +use url::Url; + +use crate::utils::HeaderValueExt; + +super::palette::palette! { + struct HeaderPalette { + http_keyword: ["keyword.other.http"], + http_separator: ["punctuation.separator.http"], + http_version: ["constant.numeric.http"], + method: ["keyword.control.http"], + path: ["const.language.http"], + status_code: ["constant.numeric.http"], + status_reason: ["keyword.reason.http"], + header_name: ["source.http", "http.requestheaders", "support.variable.http"], + header_colon: ["source.http", "http.requestheaders", "punctuation.separator.http"], + header_value: ["source.http", "http.requestheaders", "string.other.http"], + error: ["error"], + } +} + +macro_rules! set_color { + ($self:ident, $color:ident) => { + if let Some(ref palette) = $self.palette { + $self.output.set_color(&palette.$color) + } else { + Ok(()) + } + }; +} + +pub(crate) struct HeaderFormatter<'a, W: WriteColor> { + output: &'a mut W, + palette: Option, + is_terminal: bool, + sort_headers: bool, +} + +impl<'a, W: WriteColor> HeaderFormatter<'a, W> { + pub(crate) fn new( + output: &'a mut W, + theme: Option<&Theme>, + is_terminal: bool, + sort_headers: bool, + ) -> Self { + Self { + palette: theme.map(HeaderPalette::from), + output, + is_terminal, + sort_headers, + } + } + + fn print(&mut self, text: &str) -> Result<()> { + self.output.write_all(text.as_bytes()) + } + + fn print_plain(&mut self, text: &str) -> Result<()> { + set_color!(self, default)?; + self.print(text) + } + + pub(crate) fn print_request_headers( + &mut self, + method: &Method, + url: &Url, + version: Version, + headers: &HeaderMap, + ) -> Result<()> { + set_color!(self, method)?; + self.print(method.as_str())?; + + self.print_plain(" ")?; + + set_color!(self, path)?; + self.print(url.path())?; + if let Some(query) = url.query() { + self.print("?")?; + self.print(query)?; + } + + self.print_plain(" ")?; + self.print_http_version(version)?; + + self.print_plain("\n")?; + self.print_headers(headers, version)?; + + if self.palette.is_some() { + self.output.reset()?; + } + Ok(()) + } + + pub(crate) fn print_response_headers( + &mut self, + version: Version, + status: StatusCode, + reason_phrase: &str, + headers: &HeaderMap, + ) -> Result<()> { + self.print_http_version(version)?; + + self.print_plain(" ")?; + + set_color!(self, status_code)?; + self.print(status.as_str())?; + + self.print_plain(" ")?; + + set_color!(self, status_reason)?; + self.print(reason_phrase)?; + + self.print_plain("\n")?; + + self.print_headers(headers, version)?; + + if self.palette.is_some() { + self.output.reset()?; + } + Ok(()) + } + + fn print_http_version(&mut self, version: Version) -> Result<()> { + let version = format!("{version:?}"); + let version = version.strip_prefix("HTTP/").unwrap_or(&version); + + set_color!(self, http_keyword)?; + self.print("HTTP")?; + set_color!(self, http_separator)?; + self.print("/")?; + set_color!(self, http_version)?; + self.print(version)?; + + Ok(()) + } + + fn print_headers(&mut self, headers: &HeaderMap, version: Version) -> Result<()> { + let as_titlecase = match version { + Version::HTTP_09 | Version::HTTP_10 | Version::HTTP_11 => true, + Version::HTTP_2 | Version::HTTP_3 => false, + _ => false, + }; + let mut headers: Vec<(&HeaderName, &HeaderValue)> = headers.iter().collect(); + if self.sort_headers { + headers.sort_by_key(|(name, _)| name.as_str()); + } + + let mut namebuf = String::with_capacity(64); + for (name, value) in headers { + let key = if as_titlecase { + titlecase_header(name, &mut namebuf) + } else { + name.as_str() + }; + + set_color!(self, header_name)?; + self.print(key)?; + set_color!(self, header_colon)?; + self.print(":")?; + self.print_plain(" ")?; + + match value.to_ascii_or_latin1() { + Ok(ascii) => { + set_color!(self, header_value)?; + self.print(ascii)?; + } + Err(bad) => { + const FAQ_URL: &str = + "https://github.com/ducaale/xh/blob/master/FAQ.md#header-value-encoding"; + + let mut latin1 = bad.latin1(); + if self.is_terminal { + latin1 = sanitize_header_value(&latin1); + } + set_color!(self, error)?; + self.print(&latin1)?; + + if let Some(utf8) = bad.utf8() { + set_color!(self, default)?; + if self.palette.is_some() && super::supports_hyperlinks() { + self.print(" (")?; + self.print(&super::create_hyperlink("UTF-8", FAQ_URL))?; + self.print(": ")?; + } else { + self.print(" (UTF-8: ")?; + } + + set_color!(self, header_value)?; + // We could escape these as well but latin1 has a much higher chance + // to contain control characters because: + // - ~14% of the possible latin1 codepoints are control characters, + // versus <0.1% for UTF-8. + // - The latin1 text may not be intended as latin1, but if it's valid + // as UTF-8 then chances are that it really is UTF-8. + // We should revisit this if we come up with a general policy for + // escaping control characters, not just in headers. + self.print(utf8)?; + self.print_plain(")")?; + } + } + } + self.print_plain("\n")?; + } + + Ok(()) + } +} + +fn titlecase_header<'b>(name: &HeaderName, buffer: &'b mut String) -> &'b str { + let name = name.as_str(); + buffer.clear(); + buffer.reserve(name.len()); + // Ought to be equivalent to how hyper does it + // https://github.com/hyperium/hyper/blob/f46b175bf71b202fbb907c4970b5743881b891e1/src/proto/h1/role.rs#L1332 + // Header names are ASCII so operating on char or u8 is equivalent + let mut prev = '-'; + for mut c in name.chars() { + if prev == '-' { + c.make_ascii_uppercase(); + } + buffer.push(c); + prev = c; + } + buffer +} + +/// Escape control characters. Firefox uses Unicode replacement characters, +/// that seems like a good choice. +/// +/// Header values can't contain ASCII control characters (like newlines) +/// but if misencoded they frequently contain latin1 control characters. +/// What we do here might not make sense for other strings. +fn sanitize_header_value(value: &str) -> String { + const REPLACEMENT_CHARACTER: &str = "\u{FFFD}"; + value.replace(char::is_control, REPLACEMENT_CHARACTER) +} + +#[cfg(test)] +mod tests { + use indoc::indoc; + + use super::*; + + #[test] + fn test_header_casing() { + let mut headers = HeaderMap::new(); + headers.insert("ab-cd", "0".parse().unwrap()); + headers.insert("-cd", "0".parse().unwrap()); + headers.insert("-", "0".parse().unwrap()); + headers.insert("ab-%c", "0".parse().unwrap()); + headers.insert("A-b--C", "0".parse().unwrap()); + + let mut buf = termcolor::Ansi::new(Vec::new()); + let mut formatter = HeaderFormatter::new(&mut buf, None, false, false); + formatter.print_headers(&headers, Version::HTTP_11).unwrap(); + let buf = buf.into_inner(); + assert_eq!( + buf, + indoc! {b" + Ab-Cd: 0 + -Cd: 0 + -: 0 + Ab-%c: 0 + A-B--C: 0 + " + } + ); + + let mut buf = termcolor::Ansi::new(Vec::new()); + let mut formatter = HeaderFormatter::new(&mut buf, None, false, false); + formatter.print_headers(&headers, Version::HTTP_2).unwrap(); + let buf = buf.into_inner(); + assert_eq!( + buf, + indoc! {b" + ab-cd: 0 + -cd: 0 + -: 0 + ab-%c: 0 + a-b--c: 0 + " + } + ); + } +} diff --git a/src/formatting/palette.rs b/src/formatting/palette.rs new file mode 100644 index 00000000..50f62d73 --- /dev/null +++ b/src/formatting/palette.rs @@ -0,0 +1,68 @@ +//! We used to use syntect for all of our coloring and we still use syntect-compatible +//! files to store themes. +//! +//! But we've started coloring some things manually for better control (and potentially +//! for better efficiency). This macro loads colors from themes and exposes them as +//! fields on a struct. See [`super::headers`] for an example. + +macro_rules! palette { + { + $vis:vis struct $name:ident { + $($color:ident: $scopes:expr,)* + } + } => { + $vis struct $name { + $(pub $color: ::termcolor::ColorSpec,)* + #[allow(unused)] + pub default: ::termcolor::ColorSpec, + } + + impl From<&::syntect::highlighting::Theme> for $name { + fn from(theme: &::syntect::highlighting::Theme) -> Self { + let highlighter = ::syntect::highlighting::Highlighter::new(theme); + let mut parsed_scopes = ::std::vec::Vec::new(); + Self { + $($color: $crate::formatting::palette::util::extract_color( + &highlighter, + &$scopes, + &mut parsed_scopes, + ),)* + default: $crate::formatting::palette::util::extract_default(theme), + } + } + } + } +} + +pub(crate) use palette; + +pub(crate) mod util { + use syntect::{ + highlighting::{Highlighter, Theme}, + parsing::Scope, + }; + use termcolor::ColorSpec; + + use crate::formatting::{convert_color, convert_style}; + + #[inline(never)] + pub(crate) fn extract_color( + highlighter: &Highlighter, + scopes: &[&str], + parsebuf: &mut Vec, + ) -> ColorSpec { + parsebuf.clear(); + parsebuf.extend(scopes.iter().map(|s| s.parse::().unwrap())); + let style = highlighter.style_for_stack(parsebuf); + convert_style(style) + } + + #[inline(never)] + pub(crate) fn extract_default(theme: &Theme) -> ColorSpec { + let mut color = ColorSpec::new(); + if let Some(foreground) = theme.settings.foreground { + color.set_fg(convert_color(foreground)); + } + color + } +} diff --git a/src/main.rs b/src/main.rs index ac1600f0..6061e122 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,6 +35,7 @@ use reqwest::header::{ }; use reqwest::tls; use url::Host; +use utils::reason_phrase; use crate::auth::{Auth, DigestAuthMiddleware}; use crate::buffer::Buffer; @@ -586,7 +587,7 @@ fn run(args: Cli) -> Result { // HTTPie looks at --quiet, since --quiet always suppresses the response // headers even if you pass --print=h. But --print takes precedence for us. if exit_code != 0 && (is_output_redirected || !print.response_headers) { - log::warn!("HTTP {status}"); + log::warn!("HTTP {} {}", status.as_u16(), reason_phrase(&response)); } } diff --git a/src/printer.rs b/src/printer.rs index 515ef950..9ac1769b 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -7,12 +7,11 @@ use encoding_rs_io::DecodeReaderBytesBuilder; use mime::Mime; use reqwest::blocking::{Body, Request, Response}; use reqwest::cookie::CookieStore; -use reqwest::header::{ - HeaderMap, HeaderName, HeaderValue, ACCEPT, CONTENT_LENGTH, CONTENT_TYPE, COOKIE, HOST, -}; -use reqwest::Version; +use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, CONTENT_LENGTH, CONTENT_TYPE, COOKIE, HOST}; use url::Url; +use crate::formatting::headers::HeaderFormatter; +use crate::utils::reason_phrase; use crate::{ buffer::Buffer, cli::FormatOptions, @@ -142,6 +141,16 @@ impl Printer { Highlighter::new(syntax, self.theme, &mut self.buffer) } + fn get_header_formatter(&mut self) -> HeaderFormatter<'_, Buffer> { + let is_terminal = self.buffer.is_terminal(); + HeaderFormatter::new( + &mut self.buffer, + self.color.then(|| self.theme.as_syntect_theme()), + is_terminal, + self.sort_headers, + ) + } + fn print_colorized_text(&mut self, text: &str, syntax: &'static str) -> io::Result<()> { self.get_highlighter(syntax).highlight(text) } @@ -299,55 +308,6 @@ impl Printer { } } - fn print_headers(&mut self, text: &str) -> io::Result<()> { - if self.color { - self.print_colorized_text(text, "http") - } else { - self.buffer.print(text) - } - } - - fn headers_to_string(&self, headers: &HeaderMap, version: Version) -> String { - let as_titlecase = match version { - Version::HTTP_09 | Version::HTTP_10 | Version::HTTP_11 => true, - Version::HTTP_2 | Version::HTTP_3 => false, - _ => false, - }; - let mut headers: Vec<(&HeaderName, &HeaderValue)> = headers.iter().collect(); - if self.sort_headers { - headers.sort_by_key(|(name, _)| name.as_str()); - } - - let mut header_string = String::new(); - for (key, value) in headers { - if as_titlecase { - // Ought to be equivalent to how hyper does it - // https://github.com/hyperium/hyper/blob/f46b175bf71b202fbb907c4970b5743881b891e1/src/proto/h1/role.rs#L1332 - // Header names are ASCII so it's ok to operate on char instead of u8 - let mut prev = '-'; - for mut c in key.as_str().chars() { - if prev == '-' { - c.make_ascii_uppercase(); - } - header_string.push(c); - prev = c; - } - } else { - header_string.push_str(key.as_str()); - } - header_string.push_str(": "); - match value.to_str() { - Ok(value) => header_string.push_str(value), - #[allow(clippy::format_push_string)] - Err(_) => header_string.push_str(&format!("{:?}", value)), - } - header_string.push('\n'); - } - header_string.pop(); - - header_string - } - pub fn print_separator(&mut self) -> io::Result<()> { self.buffer.print("\n")?; self.buffer.flush()?; @@ -358,9 +318,7 @@ impl Printer { where T: CookieStore, { - let method = request.method(); let url = request.url(); - let query_string = url.query().map_or(String::from(""), |q| ["?", q].concat()); let version = request.version(); let mut headers = request.headers().clone(); @@ -376,16 +334,17 @@ impl Printer { // reqwest and hyper add certain headers, but only in the process of // sending the request, which we haven't done yet if let Some(body) = request.body().and_then(Body::as_bytes) { - // Added at https://github.com/seanmonstar/reqwest/blob/e56bd160ba/src/blocking/request.rs#L132 + // Added at https://github.com/seanmonstar/reqwest/blob/c4ebb07343/src/blocking/request.rs#L144 headers .entry(CONTENT_LENGTH) .or_insert_with(|| body.len().into()); } if let Some(host) = request.url().host_str() { - // This is incorrect in case of HTTP/2, but we're already assuming - // HTTP/1.1 anyway + // FIXME: in case of HTTP/2 we probably don't send this. But we probably + // do send the :authority pseudo-header, and without --http-version we don't + // even know if we're going to use HTTP/2 yet. headers.entry(HOST).or_insert_with(|| { - // Added at https://github.com/hyperium/hyper/blob/dfa1bb291d/src/client/client.rs#L237 + // Added at https://github.com/hyperium/hyper-util/blob/53aadac50d/src/client/legacy/client.rs#L278 if test_mode() { HeaderValue::from_str("http.mock") } else if let Some(port) = request.url().port() { @@ -397,25 +356,27 @@ impl Printer { }); } - let request_line = format!("{} {}{} {:?}\n", method, url.path(), query_string, version); - let headers = self.headers_to_string(&headers, version); + self.get_header_formatter().print_request_headers( + request.method(), + request.url(), + version, + &headers, + )?; - self.print_headers(&(request_line + &headers))?; - self.buffer.print("\n\n")?; + self.buffer.print("\n")?; self.buffer.flush()?; Ok(()) } pub fn print_response_headers(&mut self, response: &Response) -> io::Result<()> { - let version = response.version(); - let status = response.status(); - let headers = response.headers(); - - let status_line = format!("{:?} {}\n", version, status); - let headers = self.headers_to_string(headers, version); + self.get_header_formatter().print_response_headers( + response.version(), + response.status(), + &reason_phrase(response), + response.headers(), + )?; - self.print_headers(&(status_line + &headers))?; - self.buffer.print("\n\n")?; + self.buffer.print("\n")?; self.buffer.flush()?; Ok(()) } @@ -484,7 +445,7 @@ impl Printer { } else { let mut buf = Vec::new(); body.read_to_end(&mut buf)?; - self.buffer.print(&buf)?; + self.buffer.write_all(&buf)?; } } else if stream { match self @@ -524,11 +485,11 @@ impl Printer { total_elapsed_time += content_download_duration.as_secs_f64(); } self.buffer - .print(format!("Elapsed time: {:.5}s\n", total_elapsed_time))?; + .print(&format!("Elapsed time: {:.5}s\n", total_elapsed_time))?; if let Some(remote_addr) = response.remote_addr() { self.buffer - .print(format!("Remote address: {:?}\n", remote_addr))?; + .print(&format!("Remote address: {:?}\n", remote_addr))?; } self.buffer.print("\n")?; @@ -751,8 +712,6 @@ fn get_charset(response: &Response) -> Option<&'static Encoding> { #[cfg(test)] mod tests { - use indoc::indoc; - use crate::utils::random_string; use crate::{buffer::Buffer, cli::Cli, vec_of_strings}; @@ -841,46 +800,4 @@ mod tests { assert_eq!(p.color, true); assert!(p.buffer.is_stderr()); } - - #[test] - fn test_header_casing() { - let p = Printer { - json_indent_level: 4, - format_json: false, - sort_headers: false, - color: false, - theme: Theme::Auto, - stream: false.into(), - buffer: Buffer::new(false, None, false).unwrap(), - }; - - let mut headers = HeaderMap::new(); - headers.insert("ab-cd", "0".parse().unwrap()); - headers.insert("-cd", "0".parse().unwrap()); - headers.insert("-", "0".parse().unwrap()); - headers.insert("ab-%c", "0".parse().unwrap()); - headers.insert("A-b--C", "0".parse().unwrap()); - - assert_eq!( - p.headers_to_string(&headers, reqwest::Version::HTTP_11), - indoc! {" - Ab-Cd: 0 - -Cd: 0 - -: 0 - Ab-%c: 0 - A-B--C: 0" - } - ); - - assert_eq!( - p.headers_to_string(&headers, reqwest::Version::HTTP_2), - indoc! {" - ab-cd: 0 - -cd: 0 - -: 0 - ab-%c: 0 - a-b--c: 0" - } - ); - } } diff --git a/src/utils.rs b/src/utils.rs index 2916d73a..05dd84af 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::str::Utf8Error; use anyhow::Result; -use reqwest::blocking::Request; +use reqwest::blocking::{Request, Response}; use reqwest::header::HeaderValue; use url::Url; @@ -182,10 +182,94 @@ pub fn copy_largebuf( pub(crate) trait HeaderValueExt { fn to_utf8_str(&self) -> Result<&str, Utf8Error>; + + fn to_ascii_or_latin1(&self) -> Result<&str, BadHeaderValue<'_>>; } impl HeaderValueExt for HeaderValue { fn to_utf8_str(&self) -> Result<&str, Utf8Error> { std::str::from_utf8(self.as_bytes()) } + + /// If the value is pure ASCII, return Ok(). If not, return Err() with methods for + /// further handling. + /// + /// The Ok() version cannot contain control characters (not even ASCII ones). + fn to_ascii_or_latin1(&self) -> Result<&str, BadHeaderValue<'_>> { + self.to_str().map_err(|_| BadHeaderValue { value: self }) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct BadHeaderValue<'a> { + value: &'a HeaderValue, +} + +impl<'a> BadHeaderValue<'a> { + /// Return the header value's latin1 decoding, AKA isomorphic decode, + /// AKA ISO-8859-1 decode. This is how browsers tend to handle it. + /// + /// Not to be confused with ISO 8859-1 (which leaves 0x8X and 0x9X unmapped) + /// or with Windows-1252 (which is how HTTP bodies are decoded if they + /// declare `Content-Encoding: iso-8859-1`). + /// + /// Is likely to contain control characters. Consider replacing these. + pub(crate) fn latin1(self) -> String { + // https://infra.spec.whatwg.org/#isomorphic-decode + self.value.as_bytes().iter().map(|&b| b as char).collect() + } + + /// Return the header value's UTF-8 decoding. This is most likely what the + /// user expects, but when browsers prefer another encoding we should give + /// that one precedence. + pub(crate) fn utf8(self) -> Option<&'a str> { + self.value.to_utf8_str().ok() + } +} + +pub(crate) fn reason_phrase(response: &Response) -> Cow<'_, str> { + if let Some(reason) = response.extensions().get::() { + // The server sent a non-standard reason phrase. + // Seems like some browsers interpret this as latin1 and others as UTF-8? + // Rare case and clients aren't supposed to pay attention to the reason + // phrase so let's just do UTF-8 for convenience. + // We could send the bytes straight to stdout/stderr in case they're some + // other encoding but that's probably not worth the effort. + String::from_utf8_lossy(reason.as_bytes()) + } else if let Some(reason) = response.status().canonical_reason() { + // On HTTP/2+ no reason phrase is sent so we're just explaining the code + // to the user. + // On HTTP/1.1 and below this matches the reason the server actually sent + // or else hyper would have added a ReasonPhrase. + Cow::Borrowed(reason) + } else { + // Only reachable in case of an unknown status code over HTTP/2+. + // curl prints nothing in this case. + Cow::Borrowed("") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_latin1() { + let good = HeaderValue::from_static("Rhodes"); + let good = good.to_ascii_or_latin1(); + + assert_eq!(good, Ok("Rhodes")); + + let bad = HeaderValue::from_bytes("Ῥόδος".as_bytes()).unwrap(); + let bad = bad.to_ascii_or_latin1().unwrap_err(); + + assert_eq!(bad.latin1(), "ῬÏ\u{8c}δοÏ\u{82}"); + assert_eq!(bad.utf8(), Some("Ῥόδος")); + + let worse = HeaderValue::from_bytes(b"R\xF3dos").unwrap(); + let worse = worse.to_ascii_or_latin1().unwrap_err(); + + assert_eq!(worse.latin1(), "Ródos"); + assert_eq!(worse.utf8(), None); + } } diff --git a/tests/cli.rs b/tests/cli.rs index 6c54d5a3..f9c05624 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -18,6 +18,7 @@ use http_body_util::BodyExt; use indoc::indoc; use predicates::function::function; use predicates::str::contains; +use reqwest::header::HeaderValue; use tempfile::{tempdir, NamedTempFile, TempDir}; pub trait RequestExt { @@ -1709,7 +1710,31 @@ fn support_utf8_header_value() { HTTP/1.1 200 OK Content-Length: 0 Date: N/A - Hello: "\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x91\x80" + Hello: 你好å�� (UTF-8: 你好呀) + + + "#}) + .success(); +} + +#[test] +fn support_latin1_header_value() { + let server = server::http(|_req| async move { + hyper::Response::builder() + .header("hello", HeaderValue::from_bytes(b"R\xF3dos").unwrap()) + .header("Date", "N/A") + .body("".into()) + .unwrap() + }); + + get_command() + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 OK + Content-Length: 0 + Date: N/A + Hello: Ródos "#}) @@ -1748,7 +1773,7 @@ fn redirect_support_utf8_location() { HTTP/1.1 302 Found Content-Length: 14 Date: N/A - Location: "/page\xe4\xba\x8c" + Location: /pageäº� (UTF-8: /page二) redirecting... @@ -3747,3 +3772,25 @@ fn multiple_format_options_are_merged() { "#}); } + +#[test] +fn reason_phrase_is_preserved() { + let server = server::http(|_req| async move { + let mut response = hyper::Response::builder(); + response + .extensions_mut() + .unwrap() + .insert(hyper::ext::ReasonPhrase::from_static(b"Wonderful")); + response.header("Date", "N/A").body("".into()).unwrap() + }); + get_command() + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 Wonderful + Content-Length: 0 + Date: N/A + + + "#}); +}