From 7e5d578f06f9f768ab42c6a3ed842c6501b6bc06 Mon Sep 17 00:00:00 2001 From: Martin Connell Date: Mon, 28 Jan 2019 12:18:24 +0000 Subject: [PATCH] FIX - LangTag extraction logic broken by URL with query string immediately after lantag #383 --- src/i18n.Domain/Concrete/LanguageTag.cs | 8 ++-- src/i18n.Tests/Tests/LanguageTagTests.cs | 48 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/i18n.Domain/Concrete/LanguageTag.cs b/src/i18n.Domain/Concrete/LanguageTag.cs index 96f0318..9404dbb 100644 --- a/src/i18n.Domain/Concrete/LanguageTag.cs +++ b/src/i18n.Domain/Concrete/LanguageTag.cs @@ -92,7 +92,7 @@ public enum MatchGrade // Matches private use subtag // eg en-ABCD-GB-x-AAAA public static Regex s_regex_parseUrl = new System.Text.RegularExpressions.Regex( - @"^/([a-zA-Z]{2,3}(?:-[a-zA-Z]{4,5})?(?:-(?:[a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?)(?:$|/)", + @"^/([a-zA-Z]{2,3}(?:-[a-zA-Z]{4,5})?(?:-(?:[a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?)(?:$|/|\?|#)", System.Text.RegularExpressions.RegexOptions.CultureInvariant); // ^/ // ( # begin 1st and only capture group @@ -101,7 +101,7 @@ public enum MatchGrade // (?:-(?:[a-zA-Z]{2}|[0-9]{3}))? # optional region code (2-letter or 3-digit) - not a capture group itself // (?:\-x-([a-zA-Z0-9]{4,}))? # optional private use tag (-x- followed by 4+ alphanumericcharacters) - not a capture group itself // ) # end 1st and only capture group - // (?:$|/) # match end of string or fwd-slash char - not a capture group itself + // (?:$|/|\?|#) # match end of string or fwd-slash char or question-mark char or hash char - not a capture group itself private static ConcurrentDictionary s_cache = new ConcurrentDictionary(); // Facilitates fast and efficient re-use of languag tag instances. // Key = langtag string. @@ -533,8 +533,8 @@ public static string ExtractLangTagFromUrl(string url, UriKind uriKind, out stri string langtag = match.Groups[1].Value; // Patch the url. urlPatched = url.Substring(langtag.Length +1); - if (urlPatched.Length == 0) { - urlPatched = "/"; } + if (!urlPatched.StartsWith("/")) { + urlPatched = "/" + urlPatched; } // Success. return langtag; } diff --git a/src/i18n.Tests/Tests/LanguageTagTests.cs b/src/i18n.Tests/Tests/LanguageTagTests.cs index 1b97c76..f678bb6 100644 --- a/src/i18n.Tests/Tests/LanguageTagTests.cs +++ b/src/i18n.Tests/Tests/LanguageTagTests.cs @@ -39,6 +39,36 @@ public void ExtractLangTagFromUrl() ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD" , "zh-Hans-123-x-ABCD" , "/"); ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123", "zh-Hans-123-x-ABCDEFG123", "/"); + // #383 + ExtractLangTagFromUrlHelper("/zh?" , "zh" , "/?"); + ExtractLangTagFromUrlHelper("/zh?qs" , "zh" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-HK?qs" , "zh-HK" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-123?qs" , "zh-123" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans?qs" , "zh-Hans" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-HK?qs" , "zh-Hans-HK" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123?qs" , "zh-Hans-123" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD?qs" , "zh-Hans-123-x-ABCD" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123?qs", "zh-Hans-123-x-ABCDEFG123", "/?qs"); + + ExtractLangTagFromUrlHelper("/zh#" , "zh" , "/#"); + ExtractLangTagFromUrlHelper("/zh#bm" , "zh" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-HK#bm" , "zh-HK" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-123#bm" , "zh-123" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-Hans#bm" , "zh-Hans" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-Hans-HK#bm" , "zh-Hans-HK" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-Hans-123#bm" , "zh-Hans-123" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD#bm" , "zh-Hans-123-x-ABCD" , "/#bm"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123#bm", "zh-Hans-123-x-ABCDEFG123", "/#bm"); + + ExtractLangTagFromUrlHelper("/zh/?qs" , "zh" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-HK/?qs" , "zh-HK" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-123/?qs" , "zh-123" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans/?qs" , "zh-Hans" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-HK/?qs" , "zh-Hans-HK" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123/?qs" , "zh-Hans-123" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/?qs" , "zh-Hans-123-x-ABCD" , "/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/?qs", "zh-Hans-123-x-ABCDEFG123", "/?qs"); + ExtractLangTagFromUrlHelper("/zh/account" , "zh" , "/account"); ExtractLangTagFromUrlHelper("/zh-HK/account" , "zh-HK" , "/account"); ExtractLangTagFromUrlHelper("/zh-123/account" , "zh-123" , "/account"); @@ -48,6 +78,15 @@ public void ExtractLangTagFromUrl() ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/account" , "zh-Hans-123-x-ABCD" , "/account"); ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/account", "zh-Hans-123-x-ABCDEFG123", "/account"); + ExtractLangTagFromUrlHelper("/zh/account?qs" , "zh" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-HK/account?qs" , "zh-HK" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-123/account?qs" , "zh-123" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans/account?qs" , "zh-Hans" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-HK/account?qs" , "zh-Hans-HK" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123/account?qs" , "zh-Hans-123" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/account?qs" , "zh-Hans-123-x-ABCD" , "/account?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/account?qs", "zh-Hans-123-x-ABCDEFG123", "/account?qs"); + ExtractLangTagFromUrlHelper("/zh/account/" , "zh" , "/account/"); ExtractLangTagFromUrlHelper("/zh-HK/account/" , "zh-HK" , "/account/"); ExtractLangTagFromUrlHelper("/zh-123/account/" , "zh-123" , "/account/"); @@ -57,6 +96,15 @@ public void ExtractLangTagFromUrl() ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/account/" , "zh-Hans-123-x-ABCD" , "/account/"); ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/account/", "zh-Hans-123-x-ABCDEFG123", "/account/"); + ExtractLangTagFromUrlHelper("/zh/account/?qs" , "zh" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-HK/account/?qs" , "zh-HK" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-123/account/?qs" , "zh-123" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans/account/?qs" , "zh-Hans" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-HK/account/?qs" , "zh-Hans-HK" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123/account/?qs" , "zh-Hans-123" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/account/?qs" , "zh-Hans-123-x-ABCD" , "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/account/?qs", "zh-Hans-123-x-ABCDEFG123", "/account/?qs"); + ExtractLangTagFromUrlHelper("/zh/account/x" , "zh" , "/account/x"); ExtractLangTagFromUrlHelper("/zh-HK/account/x" , "zh-HK" , "/account/x"); ExtractLangTagFromUrlHelper("/zh-123/account/x" , "zh-123" , "/account/x");