From 69371ebeb8974ae3995f18f632a40d99b0f5090d Mon Sep 17 00:00:00 2001 From: ngala Date: Thu, 30 Nov 2023 12:30:59 +0530 Subject: [PATCH 1/6] Add support for domain level redirects Related to: https://gitlab.com/gitlab-org/gitlab-pages/-/issues/601 Changelog: added --- internal/feature/feature.go | 28 +- internal/redirects/matching.go | 117 +++++++-- internal/redirects/matching_test.go | 342 ++++++++++++++++++++++++- internal/redirects/redirects.go | 6 +- internal/redirects/utils.go | 25 ++ internal/redirects/utils_test.go | 81 ++++++ internal/redirects/validations.go | 68 +++-- internal/redirects/validations_test.go | 190 +++++++++++++- 8 files changed, 798 insertions(+), 59 deletions(-) create mode 100644 internal/redirects/utils.go create mode 100644 internal/redirects/utils_test.go diff --git a/internal/feature/feature.go b/internal/feature/feature.go index d5c340ff4..bad58441b 100644 --- a/internal/feature/feature.go +++ b/internal/feature/feature.go @@ -7,6 +7,19 @@ type Feature struct { defaultEnabled bool } +// Enabled reads the environment variable responsible for the feature flag +// if FF is disabled by default, the environment variable needs to be "true" to explicitly enable it +// if FF is enabled by default, variable needs to be "false" to explicitly disable it +func (f Feature) Enabled() bool { + env := os.Getenv(f.EnvVariable) + + if f.defaultEnabled { + return env != "false" + } + + return env == "true" +} + // RedirectsPlaceholders enables support for placeholders in redirects file // TODO: remove https://gitlab.com/gitlab-org/gitlab-pages/-/issues/620 var RedirectsPlaceholders = Feature{ @@ -19,15 +32,8 @@ var HandleReadErrors = Feature{ EnvVariable: "FF_HANDLE_READ_ERRORS", } -// Enabled reads the environment variable responsible for the feature flag -// if FF is disabled by default, the environment variable needs to be "true" to explicitly enable it -// if FF is enabled by default, variable needs to be "false" to explicitly disable it -func (f Feature) Enabled() bool { - env := os.Getenv(f.EnvVariable) - - if f.defaultEnabled { - return env != "false" - } - - return env == "true" +// DomainRedirects enables support for domain level redirects +var DomainRedirects = Feature{ + EnvVariable: "FF_ENABLE_DOMAIN_REDIRECT", + defaultEnabled: false, } diff --git a/internal/redirects/matching.go b/internal/redirects/matching.go index d52863ee6..68d53b616 100644 --- a/internal/redirects/matching.go +++ b/internal/redirects/matching.go @@ -2,6 +2,7 @@ package redirects import ( "fmt" + "net/url" "regexp" "strings" @@ -12,11 +13,12 @@ import ( ) var ( - regexMultipleSlashes = regexp.MustCompile(`//+`) + regexMultipleSlashes = regexp.MustCompile(`([^:])//+`) regexPlaceholderOrSplats = regexp.MustCompile(`(?i)\*|:[a-z]+`) ) // matchesRule returns `true` if the rule's "from" pattern matches the requested URL. +// This internally calls matchesRuleWithPlaceholderOrSplats to match rules. // // For example, given a "from" URL like this: // @@ -30,37 +32,58 @@ var ( // If the first return value is `true`, the second return value is the path that this // rule should redirect/rewrite to. This path is effectively the rule's "to" path that // has been templated with all the placeholders (if any) from the originally requested URL. -// -// TODO: Likely these should include host comparison once we have domain-level redirects -// https://gitlab.com/gitlab-org/gitlab-pages/-/issues/601 -func matchesRule(rule *netlifyRedirects.Rule, path string) (bool, string) { +func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) { + path := originalURL.Path + if feature.DomainRedirects.Enabled() { + isFromDomainURL := isDomainURL(rule.From) + isDomainRule := isFromDomainURL || isDomainURL(rule.To) + + if isDomainRule { + if isFromDomainURL && !isHostMatching(originalURL, rule.From) { + return false, "" + } + + path = originalURL.String() + } + } + // If the requested URL exactly matches this rule's "from" path, // exit early and return the rule's "to" path to avoid building // and compiling the regex below. // However, only do this if there's nothing to template in the "to" path, - // to avoid redirect/rewriting to a url with a literal `:placeholder` in it. + // to avoid redirect/rewriting to a originalURL with a literal `:placeholder` in it. if normalizePath(rule.From) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) { return true, rule.To } + return matchesRuleWithPlaceholderOrSplats(rule, path) +} + +// matchesRuleWithPlaceholderOrSplats returns `true` if the rule's "from" pattern matches the requested URL. +// This is specifically for Placeholders and Splats matching +// +// For example, given a "from" URL like this: +// +// /a/*/originalURL/with/:placeholders +// +// this function would match URLs like this: +// +// /a/nice/originalURL/with/text +// /a/super/extra/nice/originalURL/with/matches +// +// If the first return value is `true`, the second return value is the path that this +// rule should redirect/rewrite to. This path is effectively the rule's "to" path that +// has been templated with all the placeholders (if any) from the originally requested URL. +func matchesRuleWithPlaceholderOrSplats(rule *netlifyRedirects.Rule, path string) (bool, string) { // Any logic beyond this point handles placeholders and splats. // If the FF_ENABLE_PLACEHOLDERS feature flag isn't enabled, exit now. if !feature.RedirectsPlaceholders.Enabled() { return false, "" } - var regexSegments []string - for _, segment := range strings.Split(rule.From, "/") { - if segment == "" { - continue - } else if regexSplat.MatchString(segment) { - regexSegments = append(regexSegments, `/*(?P.*)/*`) - } else if regexPlaceholder.MatchString(segment) { - segmentName := strings.Replace(segment, ":", "", 1) - regexSegments = append(regexSegments, fmt.Sprintf(`/+(?P<%s>[^/]+)`, segmentName)) - } else { - regexSegments = append(regexSegments, "/+"+regexp.QuoteMeta(segment)) - } + regexSegments := convertToRegexSegments(rule.From) + if len(regexSegments) == 0 { + return false, "" } fromRegexString := `(?i)^` + strings.Join(regexSegments, "") + `/*$` @@ -84,24 +107,74 @@ func matchesRule(rule *netlifyRedirects.Rule, path string) (bool, string) { return false, "" } - templatedToPath := []byte{} + var templatedToPath []byte templatedToPath = fromRegex.ExpandString(templatedToPath, template, path, submatchIndex) // Some replacements result in subsequent slashes. For example, a rule with a "to" // like `foo/:splat/bar` will result in a path like `foo//bar` if the splat // character matches nothing. To avoid this, replace all instances // of multiple subsequent forward slashes with a single forward slash. - templatedToPath = regexMultipleSlashes.ReplaceAll(templatedToPath, []byte("/")) + // The regex captures any character except a colon ([^:]) before the double slashes + // and includes it in the replacement. + templatedToPath = regexMultipleSlashes.ReplaceAll(templatedToPath, []byte("$1/")) return true, string(templatedToPath) } +// convertToRegexSegments returns array of regex segments +func convertToRegexSegments(path string) []string { + var regexSegments []string + + // For domain host + if strings.HasPrefix(path, "http") { + for _, segment := range strings.Split(path, "://") { + if segment == "" { + continue + } else if segment == "http" || segment == "https" { + regexSegments = append(regexSegments, regexp.QuoteMeta(segment+"://")) + } else { + parts := strings.SplitN(segment, "/", 2) + + if len(parts) != 2 { + return nil + } + + // adding host here + regexSegments = append(regexSegments, regexp.QuoteMeta(parts[0])) + path = "/" + parts[1] + } + } + } + + return append(regexSegments, processPathSegments(path)...) +} + +// Process path segments +func processPathSegments(path string) []string { + var regexSegments []string + + for _, segment := range strings.Split(path, "/") { + if segment == "" { + continue + } else if regexSplat.MatchString(segment) { + regexSegments = append(regexSegments, `/*(?P.*)/*`) + } else if regexPlaceholder.MatchString(segment) { + segmentName := strings.Replace(segment, ":", "", 1) + regexSegments = append(regexSegments, fmt.Sprintf(`/+(?P<%s>[^/]+)`, segmentName)) + } else { + regexSegments = append(regexSegments, "/+"+regexp.QuoteMeta(segment)) + } + } + + return regexSegments +} + // `match` returns: // 1. The first valid redirect or rewrite rule that matches the requested URL // 2. The URL to redirect/rewrite to // // If no rule matches, this function returns `nil` and an empty string -func (r *Redirects) match(path string) (*netlifyRedirects.Rule, string) { +func (r *Redirects) match(originalURL *url.URL) (*netlifyRedirects.Rule, string) { for i := range r.rules { if i >= cfg.MaxRuleCount { // do not process any more rules @@ -116,7 +189,7 @@ func (r *Redirects) match(path string) (*netlifyRedirects.Rule, string) { continue } - if isMatch, path := matchesRule(&rule, path); isMatch { + if isMatch, path := matchesRule(&rule, originalURL); isMatch { return &rule, path } } diff --git a/internal/redirects/matching_test.go b/internal/redirects/matching_test.go index 23815b976..433de0401 100644 --- a/internal/redirects/matching_test.go +++ b/internal/redirects/matching_test.go @@ -1,6 +1,7 @@ package redirects import ( + "net/url" "testing" "github.com/stretchr/testify/require" @@ -56,7 +57,7 @@ var testsWithoutPlaceholders = map[string]testCaseData{ }, } -func Test_matchesRule(t *testing.T) { +func testMatchesRule(t *testing.T) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") tests := mergeTestSuites(testsWithoutPlaceholders, map[string]testCaseData{ @@ -68,11 +69,12 @@ func Test_matchesRule(t *testing.T) { expectMatch: true, expectedPath: "/bar/", }, + // Since we are treating path as an entire path, so // is considered domain name "multiple_leading_slashes": { rule: "/foo/ /bar/", path: "//foo", - expectMatch: true, - expectedPath: "/bar/", + expectMatch: false, + expectedPath: "", }, "multiple_slashes_in_middle": { rule: "/foo/bar /baz/", @@ -301,13 +303,25 @@ func Test_matchesRule(t *testing.T) { rules, err := netlifyRedirects.ParseString(tt.rule) require.NoError(t, err) - isMatch, path := matchesRule(&rules[0], tt.path) + parsedURL, err := url.Parse(tt.path) + require.NoError(t, err) + + isMatch, path := matchesRule(&rules[0], parsedURL) require.Equal(t, tt.expectMatch, isMatch) require.Equal(t, tt.expectedPath, path) }) } } +func Test_matchesRule(t *testing.T) { + testMatchesRule(t) +} + +func Test_matchesNonDomainRule_DomainRedirects_enabled(t *testing.T) { + t.Setenv(feature.DomainRedirects.EnvVariable, "true") + testMatchesRule(t) +} + // Tests matching behavior when the `FF_ENABLE_PLACEHOLDERS` // feature flag is not enabled. These tests can be removed when the // `FF_ENABLE_PLACEHOLDERS` flag is removed. @@ -343,7 +357,325 @@ func Test_matchesRule_NoPlaceholders(t *testing.T) { rules, err := netlifyRedirects.ParseString(tt.rule) require.NoError(t, err) - isMatch, path := matchesRule(&rules[0], tt.path) + parsedURL, err := url.Parse(tt.path) + require.NoError(t, err) + + isMatch, path := matchesRule(&rules[0], parsedURL) + require.Equal(t, tt.expectMatch, isMatch) + require.Equal(t, tt.expectedPath, path) + }) + } +} + +// Tests matching behavior when the `FF_ENABLE_DOMAIN_REDIRECT` +// feature flag is enabled. +func Test_matchesDomainRule(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + t.Setenv(feature.DomainRedirects.EnvVariable, "true") + + tests := map[string]testCaseData{ + "exact_match": { + rule: "http://pages.example.io/foo/ http://test.example.io/bar/", + path: "http://pages.example.io/foo/", + expectMatch: true, + expectedPath: "http://test.example.io/bar/", + }, + "single_trailing_slash": { + rule: "http://pages.example.io/foo/ http://test.example.io/bar/", + path: "http://pages.example.io/foo", + expectMatch: true, + expectedPath: "http://test.example.io/bar/", + }, + "ignore_missing_slash": { + rule: "http://pages.example.io/foo http://test.example.io/bar/", + path: "http://pages.example.io/foo/", + expectMatch: true, + expectedPath: "http://test.example.io/bar/", + }, + "no_match": { + rule: "http://pages.example.io/foo http://test.example.io/bar/", + path: "http://pages.example.io/foo/bar", + expectMatch: false, + expectedPath: "", + }, + + // Note: the following 3 cases behave differently when + // placeholders are disabled. See the similar test cases below. + "multiple_trailing_slashes": { + rule: "http://pages.example.io/foo/ http://test.example.io/bar/", + path: "http://pages.example.io/foo//", + expectMatch: true, + expectedPath: "http://test.example.io/bar/", + }, + "multiple_slashes_in_middle": { + rule: "http://pages.example.io/foo/bar http://test.example.io/baz/", + path: "http://pages.example.io/foo//bar", + expectMatch: true, + expectedPath: "http://test.example.io/baz/", + }, + + "domain_redirect_different_protocol": { + rule: "http://pages.example.io/foo/bar https://test.example.io/baz/", + path: "http://pages.example.io/foo//bar", + expectMatch: true, + expectedPath: "https://test.example.io/baz/", + }, + + "splat_match": { + rule: "http://pages.example.io/foo/*/bar http://test.example.io/foo/:splat/qux", + path: "http://pages.example.io/foo/baz/bar", + expectMatch: true, + expectedPath: "http://test.example.io/foo/baz/qux", + }, + "splat_match_multiple_segments": { + rule: "http://pages.example.io/foo/*/bar http://test.example.io/foo/:splat/qux", + path: "http://pages.example.io/foo/hello/world/bar", + expectMatch: true, + expectedPath: "http://test.example.io/foo/hello/world/qux", + }, + "splat_match_ignore_trailing_slash": { + rule: "http://pages.example.io/foo/*/bar http://test.example.io/foo/:splat/qux", + path: "http://pages.example.io/foo/baz/bar/", + expectMatch: true, + expectedPath: "http://test.example.io/foo/baz/qux", + }, + "splat_match_end": { + rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/baz/bar", + expectMatch: true, + expectedPath: "http://test.example.io/qux/baz/bar", + }, + "splat_match_end_with_slash": { + rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/baz/bar/", + expectMatch: true, + expectedPath: "http://test.example.io/qux/baz/bar/", + }, + "splat_match_beginning": { + rule: "http://pages.example.io/*/baz/bar http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/baz/bar", + expectMatch: true, + expectedPath: "http://test.example.io/qux/foo", + }, + "splat_match_empty_suffix": { + rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/", + expectMatch: true, + expectedPath: "http://test.example.io/qux/", + }, + "splat_consumes_trailing_slash": { + rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo", + expectMatch: true, + expectedPath: "http://test.example.io/qux/", + }, + "splat_match_empty_prefix": { + rule: "http://pages.example.io/*/foo http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo", + expectMatch: true, + expectedPath: "http://test.example.io/qux/", + }, + "splat_mid_segment": { + rule: "http://pages.example.io/foo*bar http://test.example.io/qux/:splat", + path: "http://pages.example.io/foobazbar", + expectMatch: false, + expectedPath: "", + }, + "splat_mid_segment_no_content": { + rule: "http://pages.example.io/foo*bar http://test.example.io/qux/:splat", + path: "http://pages.example.io/foobar", + expectMatch: false, + expectedPath: "", + }, + "lone_splat": { + rule: "http://pages.example.io/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/bar", + expectMatch: true, + expectedPath: "http://test.example.io/qux/foo/bar", + }, + "multiple_splats": { + rule: "http://pages.example.io/foo/*/bar/*/baz http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/hello/bar/world/baz", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello", + }, + "duplicate_splat_replacements": { + rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat/:splat", + path: "http://pages.example.io/foo/hello", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello/hello", + }, + "splat_missing_path_segment_behavior": { + rule: "http://pages.example.io/foo/*/bar http://test.example.io/foo/:splat/qux", + path: "http://pages.example.io/foo/bar", + expectMatch: true, + expectedPath: "http://test.example.io/foo/qux", + }, + "missing_splat_placeholder": { + rule: "http://pages.example.io/foo/ http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/", + expectMatch: true, + expectedPath: "http://test.example.io/qux/", + }, + "placeholder_match": { + rule: "http://pages.example.io/foo/:year/:month/:day/bar http://test.example.io/qux/:year-:month-:day", + path: "http://pages.example.io/foo/2021/08/16/bar", + expectMatch: true, + expectedPath: "http://test.example.io/qux/2021-08-16", + }, + "placeholder_match_end": { + rule: "http://pages.example.io/foo/:placeholder http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo/bar", + expectMatch: true, + expectedPath: "http://test.example.io/qux/bar", + }, + "placeholder_match_beginning": { + rule: "http://pages.example.io/:placeholder/foo http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/baz/foo", + expectMatch: true, + expectedPath: "http://test.example.io/qux/baz", + }, + "placeholder_no_multiple_segments": { + rule: "http://pages.example.io/foo/:placeholder/bar http://test.example.io/foo/:placeholder/qux", + path: "http://pages.example.io/foo/hello/world/bar", + expectMatch: false, + expectedPath: "", + }, + "placeholder_at_beginning_no_content": { + rule: "http://pages.example.io/:placeholder/foo http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo", + expectMatch: false, + expectedPath: "", + }, + "placeholder_at_end_no_content": { + rule: "http://pages.example.io/foo/:placeholder http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo/", + expectMatch: false, + expectedPath: "", + }, + "placeholder_mid_segment_in_from": { + rule: "http://pages.example.io/foo:placeholder http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foorbar", + expectMatch: false, + expectedPath: "", + }, + "placeholder_mid_segment_in_to": { + rule: "http://pages.example.io/foo/:placeholder http://test.example.io/qux/bar:placeholder", + path: "http://pages.example.io/foo/baz", + expectMatch: true, + expectedPath: "http://test.example.io/qux/barbaz", + }, + "placeholder_missing_replacement_with_substring": { + rule: "http://pages.example.io/:foo http://test.example.io/:foobar", + path: "http://pages.example.io/baz", + expectMatch: true, + expectedPath: "http://test.example.io/", + }, + "placeholder_mid_segment_no_content": { + rule: "http://pages.example.io/foo:placeholder http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo", + expectMatch: false, + expectedPath: "", + }, + "placeholder_name_substring": { + rule: "http://pages.example.io/foo/:foo/:foobar http://test.example.io/qux/:foo/:foobar", + path: "http://pages.example.io/foo/baz/quux", + expectMatch: true, + expectedPath: "http://test.example.io/qux/baz/quux", + }, + "lone_placeholder": { + rule: "http://pages.example.io/:placeholder http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo", + expectMatch: true, + expectedPath: "http://test.example.io/qux/foo", + }, + "duplicate_placeholders": { + rule: "http://pages.example.io/foo/:placeholder/bar/:placeholder/baz http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo/hello/bar/world/baz", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello", + }, + "duplicate_placeholder_replacements": { + rule: "http://pages.example.io/foo/:placeholder http://test.example.io/qux/:placeholder/:placeholder", + path: "http://pages.example.io/foo/hello", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello/hello", + }, + "splat_and_placeholder_named_splat": { + rule: "http://pages.example.io/foo/*/bar/:splat http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/hello/bar/world", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello", + }, + "placeholder_named_splat_and_splat": { + rule: "http://pages.example.io/foo/:splat/bar/* http://test.example.io/qux/:splat", + path: "http://pages.example.io/foo/hello/bar/world", + expectMatch: true, + expectedPath: "http://test.example.io/qux/hello", + }, + + // Note: These differ slightly from Netlify's matching behavior. + // GitLab replaces _all_ placeholders in the "to" path, even if + // the placeholder doesn't have corresponding match in the "from". + // Netlify only replaces placeholders that appear in the "from". + "missing_placeholder_exact_match": { + rule: "http://pages.example.io/foo/ http://test.example.io/qux/:placeholder", + path: "http://pages.example.io/foo/", + expectMatch: true, + + // Netlify would instead redirect to "/qux/:placeholder" + expectedPath: "http://test.example.io/qux/", + }, + "missing_placeholder_nonexact_match": { + rule: "http://pages.example.io/foo/:placeholderA http://test.example.io/qux/:placeholderB", + path: "http://pages.example.io/foo/bar", + expectMatch: true, + + // Netlify would instead redirect to "/qux/:placeholderB" + expectedPath: "http://test.example.io/qux/", + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + rules, err := netlifyRedirects.ParseString(tt.rule) + require.NoError(t, err) + + parsedURL, err := url.Parse(tt.path) + require.NoError(t, err) + + isMatch, path := matchesRule(&rules[0], parsedURL) + require.Equal(t, tt.expectMatch, isMatch) + require.Equal(t, tt.expectedPath, path) + }) + } +} + +// Tests matching behavior when the `FF_ENABLE_DOMAIN_REDIRECT` +// feature flag is not enabled. These tests can be removed when the +// `FF_ENABLE_DOMAIN_REDIRECT` flag is removed. +func Test_matchesDomainRule_DomainRedirect_Disabled(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + t.Setenv(feature.DomainRedirects.EnvVariable, "false") + + tests := map[string]testCaseData{ + "exact_match": { + rule: "http://pages.example.io/foo/ http://test.example.io/bar/", + path: "http://pages.example.io/foo/", + expectMatch: false, + expectedPath: "", + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + rules, err := netlifyRedirects.ParseString(tt.rule) + require.NoError(t, err) + + parsedURL, err := url.Parse(tt.path) + require.NoError(t, err) + + isMatch, path := matchesRule(&rules[0], parsedURL) require.Equal(t, tt.expectMatch, isMatch) require.Equal(t, tt.expectedPath, path) }) diff --git a/internal/redirects/redirects.go b/internal/redirects/redirects.go index 4903518a3..b1d553bbf 100644 --- a/internal/redirects/redirects.go +++ b/internal/redirects/redirects.go @@ -52,7 +52,9 @@ var ( errFailedToParseConfig = errors.New("failed to parse _redirects file") errFailedToParseURL = errors.New("unable to parse URL") errNoDomainLevelRedirects = errors.New("no domain-level redirects to outside sites") - errNoStartingForwardSlashInURLPath = errors.New("url path must start with forward slash /") + errNoDomainLevelRewrite = errors.New("no domain-level rewrite to outside sites") + errNoStartingForwardSlashInURLPath = errors.New("originalURL path must start with forward slash /") + errNoValidStartingInURLPath = errors.New("originalURL path must start with forward slash / or http:// or https://") errNoSplats = errors.New("splats are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") errNoPlaceholders = errors.New("placeholders are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") errNoParams = errors.New("params not supported") @@ -111,7 +113,7 @@ func (r *Redirects) Rewrite(originalURL *url.URL) (*url.URL, int, error) { return nil, 0, ErrNoRedirect } - rule, newPath := r.match(originalURL.Path) + rule, newPath := r.match(originalURL) if rule == nil { return nil, 0, ErrNoRedirect } diff --git a/internal/redirects/utils.go b/internal/redirects/utils.go new file mode 100644 index 000000000..e9c6f5890 --- /dev/null +++ b/internal/redirects/utils.go @@ -0,0 +1,25 @@ +package redirects + +import "net/url" + +func isDomainURL(path string) bool { + parsedURL, err := url.Parse(path) + if err != nil { + return false + } + if len(parsedURL.Scheme) > 0 { + return true + } + return false +} + +func isHostMatching(originalURL *url.URL, path string) bool { + parsedURL, err := url.Parse(path) + if err != nil { + return false + } + if originalURL.Scheme == parsedURL.Scheme && originalURL.Host == parsedURL.Host { + return true + } + return false +} diff --git a/internal/redirects/utils_test.go b/internal/redirects/utils_test.go new file mode 100644 index 000000000..0bb46e65d --- /dev/null +++ b/internal/redirects/utils_test.go @@ -0,0 +1,81 @@ +package redirects + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsDomainURL(t *testing.T) { + tests := map[string]struct { + url string + expectedBool bool + }{ + "only_path": { + url: "/goto.html", + expectedBool: false, + }, + "valid_domain_url": { + url: "https://GitLab.com", + expectedBool: true, + }, + "schemaless_domain_url_with_special_char": { + url: "/\\GitLab.com", + expectedBool: false, + }, + "schemaless_domain_url": { + url: "//GitLab.com/pages.html", + expectedBool: false, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + require.EqualValues(t, tt.expectedBool, isDomainURL(tt.url)) + }) + } +} + +func TestIsHostMatching(t *testing.T) { + tests := map[string]struct { + originalURL string + path string + expectedBool bool + }{ + "path_without_domain": { + originalURL: "https://GitLab.com/goto.html", + path: "/goto.html", + expectedBool: false, + }, + "path_without_schema": { + originalURL: "https://GitLab.com/goto.html", + path: "GitLab.com/goto.html", + expectedBool: false, + }, + "valid_matching_host": { + originalURL: "https://GitLab.com/goto.html", + path: "https://GitLab.com/goto.html", + expectedBool: true, + }, + "different_schema_path": { + originalURL: "http://GitLab.com/goto.html", + path: "https://GitLab.com/goto.html", + expectedBool: false, + }, + "different_host_path": { + originalURL: "https://GitLab.com/goto.html", + path: "https://GitLab-test.com/goto.html", + expectedBool: false, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + parsedURL, err := url.Parse(tt.originalURL) + require.NoError(t, err) + + require.EqualValues(t, tt.expectedBool, isHostMatching(parsedURL, tt.path)) + }) + } +} diff --git a/internal/redirects/validations.go b/internal/redirects/validations.go index 86fb02128..942aeb310 100644 --- a/internal/redirects/validations.go +++ b/internal/redirects/validations.go @@ -20,32 +20,59 @@ var ( // validateURL runs validations against a rule URL. // Returns `nil` if the URL is valid. -func validateURL(urlText string) error { +// nolint: gocyclo +func validateURL(urlText string, status int) error { url, err := url.Parse(urlText) if err != nil { return errFailedToParseURL } - // No support for domain-level redirects to outside sites: - // - `https://google.com` - // - `//google.com` - // - `/\google.com` - if url.Host != "" || url.Scheme != "" || strings.HasPrefix(url.Path, "/\\") { - return errNoDomainLevelRedirects - } + allowedPrefix := []string{"/"} + maxPathSegments := cfg.MaxPathSegments + if feature.DomainRedirects.Enabled() { + // No support for domain level redirects starting with // or special characters: + // - `//google.com` + // - `/\google.com` + if strings.HasPrefix(urlText, "//") || strings.HasPrefix(url.Path, "/\\") { + return errNoValidStartingInURLPath + } + + // No support for domain level rewrite + if isDomainURL(urlText) { + if status == http.StatusOK { + return errNoDomainLevelRewrite + } + maxPathSegments += 2 + allowedPrefix = append(allowedPrefix, "http://", "https://") + } - // No parent traversing relative URL's with `./` or `../` - // No ambiguous URLs like bare domains `GitLab.com` - if !strings.HasPrefix(url.Path, "/") { - return errNoStartingForwardSlashInURLPath + // No parent traversing relative URL's with `./` or `../` + // No ambiguous URLs like bare domains `GitLab.com` + if !startsWithAnyPrefix(urlText, allowedPrefix...) { + return errNoValidStartingInURLPath + } + } else { + // No support for domain-level redirects to outside sites: + // - `https://google.com` + // - `//google.com` + // - `/\google.com` + if url.Host != "" || url.Scheme != "" || strings.HasPrefix(url.Path, "/\\") { + return errNoDomainLevelRedirects + } + + // No parent traversing relative URL's with `./` or `../` + // No ambiguous URLs like bare domains `GitLab.com` + if !startsWithAnyPrefix(urlText, allowedPrefix...) { + return errNoStartingForwardSlashInURLPath + } } if feature.RedirectsPlaceholders.Enabled() { // Limit the number of path segments a rule can contain. // This prevents the matching logic from generating regular // expressions that are too large/complex. - if strings.Count(url.Path, "/") > cfg.MaxPathSegments { - return fmt.Errorf("url path cannot contain more than %d forward slashes", cfg.MaxPathSegments) + if strings.Count(url.Path, "/") > maxPathSegments { + return fmt.Errorf("originalURL path cannot contain more than %d forward slashes", cfg.MaxPathSegments) } } else { // No support for splats, https://docs.netlify.com/routing/redirects/redirect-options/#splats @@ -65,11 +92,11 @@ func validateURL(urlText string) error { // validateRule runs all validation rules on the provided rule. // Returns `nil` if the rule is valid func validateRule(r netlifyRedirects.Rule) error { - if err := validateURL(r.From); err != nil { + if err := validateURL(r.From, r.Status); err != nil { return err } - if err := validateURL(r.To); err != nil { + if err := validateURL(r.To, r.Status); err != nil { return err } @@ -93,3 +120,12 @@ func validateRule(r netlifyRedirects.Rule) error { return nil } + +func startsWithAnyPrefix(s string, prefixes ...string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(s, prefix) { + return true + } + } + return false +} diff --git a/internal/redirects/validations_test.go b/internal/redirects/validations_test.go index 2b8ac1b13..7bb75c58f 100644 --- a/internal/redirects/validations_test.go +++ b/internal/redirects/validations_test.go @@ -2,6 +2,7 @@ package redirects import ( "fmt" + "net/http" "strings" "testing" @@ -43,7 +44,7 @@ func TestRedirectsValidateUrl(t *testing.T) { }, "too_many_slashes": { url: strings.Repeat("/a", 26), - expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), + expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), }, "placeholders": { url: "/news/:year/:month/:date/:slug", @@ -58,7 +59,7 @@ func TestRedirectsValidateUrl(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url) + err := validateURL(tt.url, http.StatusMovedPermanently) if tt.expectedErr != nil { require.EqualError(t, err, tt.expectedErr.Error()) return @@ -92,7 +93,7 @@ func TestRedirectsValidateUrlNoPlaceholders(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url) + err := validateURL(tt.url, http.StatusMovedPermanently) require.ErrorIs(t, err, tt.expectedErr) }) } @@ -140,3 +141,186 @@ func TestRedirectsValidateRule(t *testing.T) { }) } } + +func TestRedirectsValidateUrl_DomainRedirect_Enabled(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + t.Setenv(feature.DomainRedirects.EnvVariable, "true") + + tests := map[string]struct { + url string + status int + expectedErr error + }{ + "valid_url": { + url: "/goto.html", + status: http.StatusOK, + }, + "domain_level_redirects": { + url: "https://GitLab.com", + status: http.StatusMovedPermanently, + }, + "domain_level_rewrite": { + url: "https://GitLab.com", + status: http.StatusOK, + expectedErr: errNoDomainLevelRewrite, + }, + "no_special_characters_escape_domain_level_redirects": { + url: "/\\GitLab.com", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_schemaless_url_domain_level_redirects": { + url: "//GitLab.com/pages.html", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_bare_domain_level_redirects": { + url: "GitLab.com", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_parent_traversing_relative_url": { + url: "../target.html", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "too_many_slashes": { + url: strings.Repeat("/a", 26), + status: http.StatusMovedPermanently, + expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), + }, + "placeholders": { + url: "/news/:year/:month/:date/:slug", + status: http.StatusMovedPermanently, + }, + "domain_redirect_placeholders": { + url: "https://GitLab.com/news/:year/:month/:date/:slug", + status: http.StatusMovedPermanently, + }, + "splats": { + url: "/blog/*", + status: http.StatusMovedPermanently, + }, + "lone_splats": { + url: "https://GitLab.com/*", + status: http.StatusMovedPermanently, + }, + "domain_redirect_splats": { + url: "https://GitLab.com/blog/*", + status: http.StatusMovedPermanently, + }, + "splat_placeholders": { + url: "/new/path/:splat", + status: http.StatusMovedPermanently, + }, + "domain_redirect_splat_placeholders": { + url: "https://GitLab.com/new/path/:splat", + status: http.StatusMovedPermanently, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + err := validateURL(tt.url, tt.status) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + }) + } +} + +// Tests validation rules that only apply when the `FF_ENABLE_DOMAIN_REDIRECT` +// feature flag is not enabled. These tests can be removed when the +// `FF_ENABLE_DOMAIN_REDIRECT` flag is removed. +func TestRedirectsValidateUrl_DomainRedirect_Disabled(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + t.Setenv(feature.DomainRedirects.EnvVariable, "true") + + tests := map[string]struct { + url string + status int + expectedErr error + }{ + "valid_url": { + url: "/goto.html", + status: http.StatusOK, + }, + "domain_level_redirects": { + url: "https://GitLab.com", + status: http.StatusMovedPermanently, + }, + "domain_level_rewrite": { + url: "https://GitLab.com", + status: http.StatusOK, + expectedErr: errNoDomainLevelRewrite, + }, + "no_special_characters_escape_domain_level_redirects": { + url: "/\\GitLab.com", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_schemaless_url_domain_level_redirects": { + url: "//GitLab.com/pages.html", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_bare_domain_level_redirects": { + url: "GitLab.com", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "no_parent_traversing_relative_url": { + url: "../target.html", + status: http.StatusMovedPermanently, + expectedErr: errNoValidStartingInURLPath, + }, + "too_many_slashes": { + url: strings.Repeat("/a", 26), + status: http.StatusMovedPermanently, + expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), + }, + "placeholders": { + url: "/news/:year/:month/:date/:slug", + status: http.StatusMovedPermanently, + }, + "domain_redirect_placeholders": { + url: "https://GitLab.com/news/:year/:month/:date/:slug", + status: http.StatusMovedPermanently, + }, + "splats": { + url: "/blog/*", + status: http.StatusMovedPermanently, + }, + "lone_splats": { + url: "https://GitLab.com/*", + status: http.StatusMovedPermanently, + }, + "domain_redirect_splats": { + url: "https://GitLab.com/blog/*", + status: http.StatusMovedPermanently, + }, + "splat_placeholders": { + url: "/new/path/:splat", + status: http.StatusMovedPermanently, + }, + "domain_redirect_splat_placeholders": { + url: "https://GitLab.com/new/path/:splat", + status: http.StatusMovedPermanently, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + err := validateURL(tt.url, tt.status) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + }) + } +} -- GitLab From 6649c6848ceb83eaf1b2794309cca49fcff2ed66 Mon Sep 17 00:00:00 2001 From: ngala Date: Fri, 1 Dec 2023 15:10:27 +0530 Subject: [PATCH 2/6] Add acceptance tests for domain level redirects --- internal/redirects/matching.go | 7 +- internal/redirects/matching_test.go | 8 +- internal/redirects/redirects.go | 4 +- internal/redirects/validations.go | 2 +- internal/redirects/validations_test.go | 6 +- internal/serving/disk/reader.go | 2 +- .../project-redirects/public.zip | Bin 2452 -> 6034 bytes .../project-redirects/public/_redirects | 1 + test/acceptance/redirects_test.go | 104 +++++++++++++++--- 9 files changed, 104 insertions(+), 30 deletions(-) diff --git a/internal/redirects/matching.go b/internal/redirects/matching.go index 68d53b616..a18d41b7d 100644 --- a/internal/redirects/matching.go +++ b/internal/redirects/matching.go @@ -35,11 +35,8 @@ var ( func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) { path := originalURL.Path if feature.DomainRedirects.Enabled() { - isFromDomainURL := isDomainURL(rule.From) - isDomainRule := isFromDomainURL || isDomainURL(rule.To) - - if isDomainRule { - if isFromDomainURL && !isHostMatching(originalURL, rule.From) { + if isDomainURL(rule.From) { + if !isHostMatching(originalURL, rule.From) { return false, "" } diff --git a/internal/redirects/matching_test.go b/internal/redirects/matching_test.go index 433de0401..8dbf75c49 100644 --- a/internal/redirects/matching_test.go +++ b/internal/redirects/matching_test.go @@ -374,7 +374,13 @@ func Test_matchesDomainRule(t *testing.T) { t.Setenv(feature.DomainRedirects.EnvVariable, "true") tests := map[string]testCaseData{ - "exact_match": { + "exact_path_match": { + rule: "/foo/ http://test.example.io/bar/", + path: "http://pages.example.io/foo/", + expectMatch: true, + expectedPath: "http://test.example.io/bar/", + }, + "exact_url_match": { rule: "http://pages.example.io/foo/ http://test.example.io/bar/", path: "http://pages.example.io/foo/", expectMatch: true, diff --git a/internal/redirects/redirects.go b/internal/redirects/redirects.go index b1d553bbf..3fb2525c3 100644 --- a/internal/redirects/redirects.go +++ b/internal/redirects/redirects.go @@ -53,8 +53,8 @@ var ( errFailedToParseURL = errors.New("unable to parse URL") errNoDomainLevelRedirects = errors.New("no domain-level redirects to outside sites") errNoDomainLevelRewrite = errors.New("no domain-level rewrite to outside sites") - errNoStartingForwardSlashInURLPath = errors.New("originalURL path must start with forward slash /") - errNoValidStartingInURLPath = errors.New("originalURL path must start with forward slash / or http:// or https://") + errNoStartingForwardSlashInURLPath = errors.New("url path must start with forward slash /") + errNoValidStartingInURLPath = errors.New("url path must start with forward slash / or http:// or https://") errNoSplats = errors.New("splats are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") errNoPlaceholders = errors.New("placeholders are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") errNoParams = errors.New("params not supported") diff --git a/internal/redirects/validations.go b/internal/redirects/validations.go index 942aeb310..288b544ba 100644 --- a/internal/redirects/validations.go +++ b/internal/redirects/validations.go @@ -72,7 +72,7 @@ func validateURL(urlText string, status int) error { // This prevents the matching logic from generating regular // expressions that are too large/complex. if strings.Count(url.Path, "/") > maxPathSegments { - return fmt.Errorf("originalURL path cannot contain more than %d forward slashes", cfg.MaxPathSegments) + return fmt.Errorf("url path cannot contain more than %d forward slashes", cfg.MaxPathSegments) } } else { // No support for splats, https://docs.netlify.com/routing/redirects/redirect-options/#splats diff --git a/internal/redirects/validations_test.go b/internal/redirects/validations_test.go index 7bb75c58f..aff316035 100644 --- a/internal/redirects/validations_test.go +++ b/internal/redirects/validations_test.go @@ -44,7 +44,7 @@ func TestRedirectsValidateUrl(t *testing.T) { }, "too_many_slashes": { url: strings.Repeat("/a", 26), - expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), + expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), }, "placeholders": { url: "/news/:year/:month/:date/:slug", @@ -187,7 +187,7 @@ func TestRedirectsValidateUrl_DomainRedirect_Enabled(t *testing.T) { "too_many_slashes": { url: strings.Repeat("/a", 26), status: http.StatusMovedPermanently, - expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), + expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), }, "placeholders": { url: "/news/:year/:month/:date/:slug", @@ -280,7 +280,7 @@ func TestRedirectsValidateUrl_DomainRedirect_Disabled(t *testing.T) { "too_many_slashes": { url: strings.Repeat("/a", 26), status: http.StatusMovedPermanently, - expectedErr: fmt.Errorf("originalURL path cannot contain more than %d forward slashes", defaultMaxPathSegments), + expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), }, "placeholders": { url: "/news/:year/:month/:date/:slug", diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index a483a8fee..d84b3cbf1 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -65,7 +65,7 @@ func (reader *Reader) tryRedirects(h serving.Handler) bool { return reader.tryFile(h) } - http.Redirect(h.Writer, h.Request, rewrittenURL.Path, status) + http.Redirect(h.Writer, h.Request, rewrittenURL.String(), status) return true } diff --git a/shared/pages/group.redirects/project-redirects/public.zip b/shared/pages/group.redirects/project-redirects/public.zip index 88c2cbb0864e25e5dcf07e25ae3e74ffccf54e0f..70d9499b15b59612193ecfe2db2cc9f7d2da8038 100644 GIT binary patch literal 6034 zcmWIWW@Zs#0D(<~jo}~~hS?bu7z#?0ax#`eZ6>yc__wRnUk4{W?XWDXmWZ&Qo;vc zpRf=7VI2VtOcNxS)di#(3l#;Le{@TBF!d=aItHkD9OqGZ<`#UYLqOd3-1)2CIvOWD zuWDt!Z=$~qe^loxmCvb6c^bB|Mxta`d} z_PgnaoOecD(fFMaEjR7m3CT>xDpmnA3nrNZUd!dBJ)>gpX;;PkSX_I3z8%P4fBc@Z zRWdLzfLxE=UmzdLAV&q%gL?5eyg1;of}D`J*id36z9=;%vnVyWq!=74p!5jC=&>^8 zVBTQ`9*6h8yXL9~u6j33{zj_(Ll*r)ZwATjvH$B%F7gXbx}eo7%sy{^u7O#hO_F-s zMM3}BIq$6}D{=q0tFXBKUt}3~&xPQ08)$!|pAD(=uw7@AgNFY`5J#WRX1$kN> zeH+hyOPw7ac!g29Xq&H}oC`PSnSw<@A*>O7`i!B^Hr&r%q`l$s4Kc2;1_l`pF7v?5 zpaZX`+~;k~H9n@$oGZiOr}?{heP@*XZ>0s2LY}I+Z}$n$HL#EKl2?@ycVDpJK(oiG z-mUp6?=QUDWpRt8_G++bYetdz1CH#c%1!$_ZoLd$mHKykhUW3)IQObGnV}cgKinR@ z!tM`OfLQGWcVYb#K6%!zvobo;-!ID+y}U?r!u1fxDZ*3iCI417FMaS~^OYlqrZ3>T zw^nIZay4=L{%w*l1#Jm)uvi)aYpYuLpCp^!12R>l}moZ_C8K!&;3=AN15#fbwBPhHS zvF1g5mJev4fvN;#FM#;mh(IeYNQ6`ou)+~Uqh&?hVIhh&EQ$*f!MPh`6bKKf|3Nhy zvj3$R7#I`~{!hxuPuDHTFD}tFGB7gKHL%b%G$JbbaC_CnJ2bP%J#ge3C7#J8p_TdgdZL9%EoTCO+u!=G;Fi0YTH7zqIRX4vZwWugFC6%b~ z)IRAE>T&vvXQ+n1PMC(Du9mkiEGVVo&VCnXU|;~*fjcNwum&XohYV;Kg0doV(gg8& z5n))An4MWnLFN?48X#a(2XFS|2gftK^$IGUa5Qs3B@0?X#eMRGP8h?YBQ~uM7~J*1 zW%|oa)&3weK^S-10Tl@FrY^L71+seZwy}`YjU)pDgDfK5fE=D$ky%`lnU}5$O8-RV zKSbDF={oah(wRLaty|8_34w%S{l&X(AX`8fcPMILNpK*?=*1Io&Vc4Z^a?@{wSoZW z7yYu#;>?o#qGH|jQeq1**k?83W_99!xgz32yb;EOaujlJ4B7Z4jXxL|U_EN$%OiB-kzEh!(t%tK!b=)2Fe14g8aAX9 zKFBtK!U?(QkLr;=Cb&m%wDCdFfmX1h8;{!~IxN^cg2N^(DHYi!P>dkA$w6&>P;i0p zlEzmoaF5_?d}FC5(Ct9>4XAYwG9QGOG{&%E^9?@h@OTMiB*;r38iqkFaTxopqdXhj zOQ3QSoH;=5hhaRi0x}rcAE1^w+~CFxHf;VNqMeQ&RLHGbP^Ag8Vo9SsJKXKWHD5uV zBEdtT5)onBN_K1>BFZ3ExW^zB4JD}$)bxZK-N?#?%`;$IP%OdYeptjH zH|LO(_>#tbTyXb;@(y(hY1AZcz>Vn-kX_^!)Sw7KZdrlaaIjEY()fuR#X|&|P$cFi zP*V+IRT&R1j}fqyh!{g|@1O>!125cz;Iu$aBMV=SLM<^4@?!cCVl7!MssL|RHc$YH RF^Dm|<6vOu0}UK8008$7p@ske literal 2452 zcmWIWW@h1H0DL$tKn8$l4u(lNeL;Wxp0ZUkFff!cGB5}t49Lt&Nv+V!D9OzM8>OOAo}j8xo`7c5 z+Ww7vhYWaJ%J&_5HE)M<$kO=-^GuI%E>S5u^FO%F$ROSNrQiE^c`H?$XDVpjU+-+P zT6j@LW%zoTNuRRLh70Az9l9)SKKtC`)FZ2&Zk+vY`XT3?QCBp6XGF_Qdv`)Ild+0b zz|4Y4=7867d1=q6*n8SlF+Ud9UY~D=@B=6iRvE6kXu`3XkBOxbkjSbX@o zxi?q%xN=#NgzU$fliv!BH8(Zf=Qa9y{eJTG11z?s$D9lN^W~q{)>j1X;B&j8${Hy4 z>0ZNf<5$a?UP#=|xqq!v)K%6%DgE}7CyrYt9Nc=+*JP%+XMgfIa%O9?N#;CuDDb;EzsmQ@uN&fwXD4`QWoG`Ln{i~%-YIro zCVGAjj*aXSr$}epd)O_!TUxk@^Vh27T3OE%19PKU{tG_MJHTo4?77G7td(8=-YkiA z{yI0LVbhCln^_fZX+^IeUUPA_sZ4hgxx7en!u*v@8tNMF_t}*uXj<%u|O-#>B*3C)GOCgw! zd_hVTKuQ(4P|cQ#JNsRffq?;pB@t$)W#**n=9i@w6=kNR5;VRfBUJ&UL!mseSfMzz zs4O)F*&a|5g5@C)&5f|9xF8Xcmp};!EdwAN3o_*8rfPqF1_lNY7DY7#EkA*b0%0`c zLNYRo6*7wz67v*-100cp0%Usq#k+2j3=9k)EQ@M-YDH#oNoHQUF36XN-~ibG!nkdL zX#@uknth(dn#INp3=ANwfUqwqCqG@cAiuao*T}%gP}jgh*U$*nN-SlLO@^VJZ>0iA zuZ@0&p&b{OO@UodYI#v+Nouh|Vo|C>K~ZW+Nu@$wYGR3^jedb0nqxrm0>iur#}p-I zXBLwXLu<=VTohygl@bgLNPdDCQPkVa?Fr?0g5n~dqFfig9M0TU|L#YLS3!_5+03pp1ZCKoB+_6e6IwfnjcxSo2_pI~`Ik5Dp8FfvBYdvU__`4MdAQ zEJ+TMC?G)rG8(l=KsNd{s?o4~j$A+x=X%sq0NILY7I<)g@->b!fjC=W0S=;hQ3Cx1 u!WKv_4GkgANYt#3>~MWnxWgevV$JxtY++>sIg*=!o8bvF1H&v<5Dx$!F?s3$ diff --git a/shared/pages/group.redirects/project-redirects/public/_redirects b/shared/pages/group.redirects/project-redirects/public/_redirects index 0adee383b..63996c073 100644 --- a/shared/pages/group.redirects/project-redirects/public/_redirects +++ b/shared/pages/group.redirects/project-redirects/public/_redirects @@ -6,6 +6,7 @@ /project-redirects/news/:year/:month/:date/:slug /project-redirects/blog/:year/:month/:date/:slug /project-redirects/pit.html /project-redirects/spikes.html 302 /project-redirects/goto-domain.html https://GitLab.com/pages.html 302 +/project-redirects/goto-no-schema-domain.html GitLab.com/pages.html 302 /project-redirects/goto-bare-domain.html GitLab.com/pages.html 302 /project-redirects/goto-schemaless.html //GitLab.com/pages.html 302 /project-redirects/cake-portal/ /project-redirects/still-alive/ 302 diff --git a/test/acceptance/redirects_test.go b/test/acceptance/redirects_test.go index a2bdde53d..5c7f765ae 100644 --- a/test/acceptance/redirects_test.go +++ b/test/acceptance/redirects_test.go @@ -14,6 +14,7 @@ import ( func TestRedirectStatusPage(t *testing.T) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + t.Setenv(feature.DomainRedirects.EnvVariable, "true") RunPagesProcess(t, withListeners([]ListenSpec{httpListener}), @@ -26,11 +27,18 @@ func TestRedirectStatusPage(t *testing.T) { require.NoError(t, err) testhelpers.Close(t, rsp.Body) - require.Contains(t, string(body), "14 rules") + require.Contains(t, string(body), "15 rules") require.Equal(t, http.StatusOK, rsp.StatusCode) } -func TestRedirect(t *testing.T) { +type rewrite struct { + host string + path string + expectedStatus int + expectedLocation string +} + +func testRedirect(t *testing.T, tests []rewrite) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") RunPagesProcess(t, @@ -44,12 +52,20 @@ func TestRedirect(t *testing.T) { require.Equal(t, http.StatusOK, rsp.StatusCode) - tests := []struct { - host string - path string - expectedStatus int - expectedLocation string - }{ + for _, tt := range tests { + t.Run(fmt.Sprintf("%s%s -> %s (%d)", tt.host, tt.path, tt.expectedLocation, tt.expectedStatus), func(t *testing.T) { + rsp, err := GetRedirectPage(t, httpListener, tt.host, tt.path) + require.NoError(t, err) + testhelpers.Close(t, rsp.Body) + + require.Equal(t, tt.expectedLocation, rsp.Header.Get("Location")) + require.Equal(t, tt.expectedStatus, rsp.StatusCode) + }) + } +} + +func TestRedirect(t *testing.T) { + tests := []rewrite{ // Project domain { host: "group.redirects.gitlab-example.com", @@ -60,7 +76,7 @@ func TestRedirect(t *testing.T) { // Make sure invalid rule does not redirect { host: "group.redirects.gitlab-example.com", - path: "/project-redirects/goto-domain.html", + path: "/project-redirects/goto-no-schema-domain.html", expectedStatus: http.StatusNotFound, expectedLocation: "", }, @@ -92,16 +108,70 @@ func TestRedirect(t *testing.T) { expectedStatus: http.StatusMovedPermanently, expectedLocation: "/project-redirects/careers/assistant-to-the-regional-manager.html", }, + // Domain redirect + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/goto-domain.html", + expectedStatus: http.StatusNotFound, + expectedLocation: "", + }, } + testRedirect(t, tests) +} - for _, tt := range tests { - t.Run(fmt.Sprintf("%s%s -> %s (%d)", tt.host, tt.path, tt.expectedLocation, tt.expectedStatus), func(t *testing.T) { - rsp, err := GetRedirectPage(t, httpListener, tt.host, tt.path) - require.NoError(t, err) - testhelpers.Close(t, rsp.Body) +func TestRedirect_DomainRedirects_Enabled(t *testing.T) { + t.Setenv(feature.DomainRedirects.EnvVariable, "true") - require.Equal(t, tt.expectedLocation, rsp.Header.Get("Location")) - require.Equal(t, tt.expectedStatus, rsp.StatusCode) - }) + tests := []rewrite{ + // Project domain + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/redirect-portal.html", + expectedStatus: http.StatusFound, + expectedLocation: "/project-redirects/magic-land.html", + }, + // Make sure invalid rule does not redirect + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/goto-no-schema-domain.html", + expectedStatus: http.StatusNotFound, + expectedLocation: "", + }, + // Actual file on disk should override any redirects that match + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/file-override.html", + expectedStatus: http.StatusOK, + expectedLocation: "", + }, + // Group-level domain + { + host: "group.redirects.gitlab-example.com", + path: "/redirect-portal.html", + expectedStatus: http.StatusFound, + expectedLocation: "/magic-land.html", + }, + // Custom domain + { + host: "redirects.custom-domain.com", + path: "/redirect-portal.html", + expectedStatus: http.StatusFound, + expectedLocation: "/magic-land.html", + }, + // Permanent redirect for splat (*) with replacement (:splat) + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/jobs/assistant-to-the-regional-manager.html", + expectedStatus: http.StatusMovedPermanently, + expectedLocation: "/project-redirects/careers/assistant-to-the-regional-manager.html", + }, + // Domain redirect + { + host: "group.redirects.gitlab-example.com", + path: "/project-redirects/goto-domain.html", + expectedStatus: http.StatusFound, + expectedLocation: "https://GitLab.com/pages.html", + }, } + testRedirect(t, tests) } -- GitLab From 8eea6c8a9a0f7b02e3cc45ecacacdc71f083cd97 Mon Sep 17 00:00:00 2001 From: ngala Date: Fri, 1 Dec 2023 17:38:29 +0530 Subject: [PATCH 3/6] Add url host check --- internal/serving/disk/reader.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index d84b3cbf1..3ecb14a28 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -49,6 +49,10 @@ func (reader *Reader) tryRedirects(h serving.Handler) bool { r := redirects.ParseRedirects(ctx, root) + // In local, URL Host is coming blank + if len(h.Request.URL.Host) == 0 { + h.Request.URL.Host = h.Request.Host + } rewrittenURL, status, err := r.Rewrite(h.Request.URL) if err != nil { if !errors.Is(err, redirects.ErrNoRedirect) { -- GitLab From 85d347ef35af3ff7e3ce51ca54c4f8cc57ab3a4f Mon Sep 17 00:00:00 2001 From: ngala Date: Thu, 14 Dec 2023 13:44:09 +0530 Subject: [PATCH 4/6] Support schema url in from path without FF --- internal/redirects/matching.go | 16 +-- internal/redirects/matching_test.go | 7 ++ internal/redirects/utils.go | 10 +- internal/redirects/validations.go | 43 +++++++- internal/redirects/validations_test.go | 145 +++++++++++++++++++++++-- internal/serving/disk/helpers.go | 10 ++ internal/serving/disk/reader.go | 9 +- 7 files changed, 212 insertions(+), 28 deletions(-) diff --git a/internal/redirects/matching.go b/internal/redirects/matching.go index a18d41b7d..ab0f41cc8 100644 --- a/internal/redirects/matching.go +++ b/internal/redirects/matching.go @@ -34,14 +34,16 @@ var ( // has been templated with all the placeholders (if any) from the originally requested URL. func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) { path := originalURL.Path - if feature.DomainRedirects.Enabled() { - if isDomainURL(rule.From) { - if !isHostMatching(originalURL, rule.From) { - return false, "" - } - - path = originalURL.String() + if isDomainURL(rule.From) { + if !isHostMatching(originalURL, rule.From) { + return false, "" } + + path = originalURL.String() + } + + if !feature.DomainRedirects.Enabled() && isDomainURL(rule.To) { + return false, "" } // If the requested URL exactly matches this rule's "from" path, diff --git a/internal/redirects/matching_test.go b/internal/redirects/matching_test.go index 8dbf75c49..956436bc7 100644 --- a/internal/redirects/matching_test.go +++ b/internal/redirects/matching_test.go @@ -83,6 +83,13 @@ func testMatchesRule(t *testing.T) { expectedPath: "/baz/", }, + "schema_host_from_url": { + rule: "http://pages.example.io/foo /baz/", + path: "http://pages.example.io/foo", + expectMatch: true, + expectedPath: "/baz/", + }, + "splat_match": { rule: "/foo/*/bar /foo/:splat/qux", path: "/foo/baz/bar", diff --git a/internal/redirects/utils.go b/internal/redirects/utils.go index e9c6f5890..172ae82ba 100644 --- a/internal/redirects/utils.go +++ b/internal/redirects/utils.go @@ -2,17 +2,21 @@ package redirects import "net/url" -func isDomainURL(path string) bool { - parsedURL, err := url.Parse(path) +// isDomainURL checks if the given urlString is a valid domain URL with scheme and host parts. +// Returns true if urlString is a valid domain URL, false otherwise. +func isDomainURL(urlString string) bool { + parsedURL, err := url.Parse(urlString) if err != nil { return false } - if len(parsedURL.Scheme) > 0 { + if len(parsedURL.Scheme) > 0 && len(parsedURL.Host) > 0 { return true } return false } +// isHostMatching checks if the host and scheme of the originalURL matches the parsed host and scheme of the input path URL. +// It returns true if they match, false otherwise. func isHostMatching(originalURL *url.URL, path string) bool { parsedURL, err := url.Parse(path) if err != nil { diff --git a/internal/redirects/validations.go b/internal/redirects/validations.go index 288b544ba..8a6889570 100644 --- a/internal/redirects/validations.go +++ b/internal/redirects/validations.go @@ -18,17 +18,43 @@ var ( regexPlaceholderReplacement = regexp.MustCompile(`(?i):(?P[a-z]+)`) ) +func validateFromURL(urlText string) error { + url, err := url.Parse(urlText) + if err != nil { + return errFailedToParseURL + } + // No support for domain level redirects starting with // or special characters: + // - `//google.com` + // - `/\google.com` + if strings.HasPrefix(urlText, "//") || strings.HasPrefix(url.Path, "/\\") { + return errNoValidStartingInURLPath + } + + if url.Scheme != "" && url.Scheme != "http" && url.Scheme != "https" { + return errNoValidStartingInURLPath + } + + if url.Scheme == "" && url.Host == "" { + // No parent traversing relative URL's with `./` or `../` + // No ambiguous URLs like bare domains `GitLab.com` + if !strings.HasPrefix(urlText, "/") { + return errNoValidStartingInURLPath + } + } + + return validateSplatAndPlaceholders(url) +} + // validateURL runs validations against a rule URL. // Returns `nil` if the URL is valid. // nolint: gocyclo -func validateURL(urlText string, status int) error { +func validateToURL(urlText string, status int) error { url, err := url.Parse(urlText) if err != nil { return errFailedToParseURL } allowedPrefix := []string{"/"} - maxPathSegments := cfg.MaxPathSegments if feature.DomainRedirects.Enabled() { // No support for domain level redirects starting with // or special characters: // - `//google.com` @@ -42,7 +68,6 @@ func validateURL(urlText string, status int) error { if status == http.StatusOK { return errNoDomainLevelRewrite } - maxPathSegments += 2 allowedPrefix = append(allowedPrefix, "http://", "https://") } @@ -67,7 +92,15 @@ func validateURL(urlText string, status int) error { } } + return validateSplatAndPlaceholders(url) +} + +func validateSplatAndPlaceholders(url *url.URL) error { if feature.RedirectsPlaceholders.Enabled() { + maxPathSegments := cfg.MaxPathSegments + if url.Scheme != "" { + maxPathSegments += 2 + } // Limit the number of path segments a rule can contain. // This prevents the matching logic from generating regular // expressions that are too large/complex. @@ -92,11 +125,11 @@ func validateURL(urlText string, status int) error { // validateRule runs all validation rules on the provided rule. // Returns `nil` if the rule is valid func validateRule(r netlifyRedirects.Rule) error { - if err := validateURL(r.From, r.Status); err != nil { + if err := validateFromURL(r.From); err != nil { return err } - if err := validateURL(r.To, r.Status); err != nil { + if err := validateToURL(r.To, r.Status); err != nil { return err } diff --git a/internal/redirects/validations_test.go b/internal/redirects/validations_test.go index aff316035..44bbcbfd6 100644 --- a/internal/redirects/validations_test.go +++ b/internal/redirects/validations_test.go @@ -12,7 +12,64 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/feature" ) -func TestRedirectsValidateUrl(t *testing.T) { +func TestRedirectsValidateFromUrl(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + + tests := map[string]struct { + url string + expectedErr error + }{ + "valid_url": { + url: "/goto.html", + }, + "no_domain_level_redirects": { + url: "https://GitLab.com", + }, + "no_special_characters_escape_domain_level_redirects": { + url: "/\\GitLab.com", + expectedErr: errNoValidStartingInURLPath, + }, + "no_schemaless_url_domain_level_redirects": { + url: "//GitLab.com/pages.html", + expectedErr: errNoValidStartingInURLPath, + }, + "no_bare_domain_level_redirects": { + url: "GitLab.com", + expectedErr: errNoValidStartingInURLPath, + }, + "no_parent_traversing_relative_url": { + url: "../target.html", + expectedErr: errNoValidStartingInURLPath, + }, + "too_many_slashes": { + url: strings.Repeat("/a", 26), + expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), + }, + "placeholders": { + url: "/news/:year/:month/:date/:slug", + }, + "splats": { + url: "/blog/*", + }, + "splat_placeholders": { + url: "/new/path/:splat", + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + err := validateFromURL(tt.url) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + }) + } +} + +func TestRedirectsValidateToUrl(t *testing.T) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") tests := map[string]struct { @@ -59,7 +116,7 @@ func TestRedirectsValidateUrl(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url, http.StatusMovedPermanently) + err := validateToURL(tt.url, http.StatusMovedPermanently) if tt.expectedErr != nil { require.EqualError(t, err, tt.expectedErr.Error()) return @@ -93,7 +150,7 @@ func TestRedirectsValidateUrlNoPlaceholders(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url, http.StatusMovedPermanently) + err := validateToURL(tt.url, http.StatusMovedPermanently) require.ErrorIs(t, err, tt.expectedErr) }) } @@ -109,9 +166,12 @@ func TestRedirectsValidateRule(t *testing.T) { "valid_rule": { rule: "/goto.html /target.html 301", }, + "valid_from_host_url": { + rule: "http://valid.com/ /teapot.html 302", + }, "invalid_from_url": { rule: "invalid.com /teapot.html 302", - expectedErr: errNoStartingForwardSlashInURLPath, + expectedErr: errNoValidStartingInURLPath, }, "invalid_to_url": { rule: "/goto.html invalid.com", @@ -142,7 +202,76 @@ func TestRedirectsValidateRule(t *testing.T) { } } -func TestRedirectsValidateUrl_DomainRedirect_Enabled(t *testing.T) { +func TestRedirectsValidateFromUrl_DomainRedirect_Enabled(t *testing.T) { + t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") + + tests := map[string]struct { + url string + expectedErr error + }{ + "valid_url": { + url: "/goto.html", + }, + "domain_level": { + url: "https://GitLab.com", + }, + "no_special_characters_escape_domain_level": { + url: "/\\GitLab.com", + expectedErr: errNoValidStartingInURLPath, + }, + "no_schemaless_url_domain_level": { + url: "//GitLab.com/pages.html", + expectedErr: errNoValidStartingInURLPath, + }, + "no_bare_domain_level": { + url: "GitLab.com", + expectedErr: errNoValidStartingInURLPath, + }, + "no_parent_traversing_relative_url": { + url: "../target.html", + expectedErr: errNoValidStartingInURLPath, + }, + "too_many_slashes": { + url: strings.Repeat("/a", 26), + expectedErr: fmt.Errorf("url path cannot contain more than %d forward slashes", defaultMaxPathSegments), + }, + "placeholders": { + url: "/news/:year/:month/:date/:slug", + }, + "domain_redirect_placeholders": { + url: "https://GitLab.com/news/:year/:month/:date/:slug", + }, + "splats": { + url: "/blog/*", + }, + "lone_splats": { + url: "https://GitLab.com/*", + }, + "domain_redirect_splats": { + url: "https://GitLab.com/blog/*", + }, + "splat_placeholders": { + url: "/new/path/:splat", + }, + "domain_redirect_splat_placeholders": { + url: "https://GitLab.com/new/path/:splat", + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + err := validateFromURL(tt.url) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + }) + } +} + +func TestRedirectsValidateToUrl_DomainRedirect_Enabled(t *testing.T) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") t.Setenv(feature.DomainRedirects.EnvVariable, "true") @@ -221,7 +350,7 @@ func TestRedirectsValidateUrl_DomainRedirect_Enabled(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url, tt.status) + err := validateToURL(tt.url, tt.status) if tt.expectedErr != nil { require.EqualError(t, err, tt.expectedErr.Error()) return @@ -235,7 +364,7 @@ func TestRedirectsValidateUrl_DomainRedirect_Enabled(t *testing.T) { // Tests validation rules that only apply when the `FF_ENABLE_DOMAIN_REDIRECT` // feature flag is not enabled. These tests can be removed when the // `FF_ENABLE_DOMAIN_REDIRECT` flag is removed. -func TestRedirectsValidateUrl_DomainRedirect_Disabled(t *testing.T) { +func TestRedirectsValidateToUrl_DomainRedirect_Disabled(t *testing.T) { t.Setenv(feature.RedirectsPlaceholders.EnvVariable, "true") t.Setenv(feature.DomainRedirects.EnvVariable, "true") @@ -314,7 +443,7 @@ func TestRedirectsValidateUrl_DomainRedirect_Disabled(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { - err := validateURL(tt.url, tt.status) + err := validateToURL(tt.url, tt.status) if tt.expectedErr != nil { require.EqualError(t, err, tt.expectedErr.Error()) return diff --git a/internal/serving/disk/helpers.go b/internal/serving/disk/helpers.go index 96360d494..c919f1887 100644 --- a/internal/serving/disk/helpers.go +++ b/internal/serving/disk/helpers.go @@ -5,6 +5,7 @@ import ( "io" "mime" "net/http" + "net/url" "path/filepath" "strconv" "strings" @@ -36,6 +37,15 @@ func endsWithoutHTMLExtension(path string) bool { return !strings.HasSuffix(path, ".html") } +func cloneURL(originalURL *url.URL) *url.URL { + newURL := new(url.URL) + + // Copy relevant fields + *newURL = *originalURL + + return newURL +} + // Detect file's content-type either by extension or mime-sniffing. // Implementation is adapted from Golang's `http.serveContent()` // See https://github.com/golang/go/blob/902fc114272978a40d2e65c2510a18e870077559/src/net/http/fs.go#L194 diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index 3ecb14a28..a7d1bb298 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -49,11 +49,10 @@ func (reader *Reader) tryRedirects(h serving.Handler) bool { r := redirects.ParseRedirects(ctx, root) - // In local, URL Host is coming blank - if len(h.Request.URL.Host) == 0 { - h.Request.URL.Host = h.Request.Host - } - rewrittenURL, status, err := r.Rewrite(h.Request.URL) + requestURL := cloneURL(h.Request.URL) + // Taking value from h.Request.Host as h.Request.URL.Host is not populated + requestURL.Host = h.Request.Host + rewrittenURL, status, err := r.Rewrite(requestURL) if err != nil { if !errors.Is(err, redirects.ErrNoRedirect) { // We assume that rewrite failure is not fatal -- GitLab From cc346c7e48aa6c6f6d18e379ad66c245ce13c477 Mon Sep 17 00:00:00 2001 From: ngala Date: Tue, 19 Dec 2023 16:39:08 +0530 Subject: [PATCH 5/6] Address review comment --- internal/redirects/matching.go | 67 ++++++++------------------ internal/redirects/matching_test.go | 12 ----- internal/redirects/redirects.go | 1 + internal/redirects/utils.go | 19 +++++--- internal/redirects/utils_test.go | 18 ++++--- internal/redirects/validations.go | 40 ++++++++------- internal/redirects/validations_test.go | 4 ++ 7 files changed, 71 insertions(+), 90 deletions(-) diff --git a/internal/redirects/matching.go b/internal/redirects/matching.go index ab0f41cc8..e5cb8a487 100644 --- a/internal/redirects/matching.go +++ b/internal/redirects/matching.go @@ -33,15 +33,14 @@ var ( // rule should redirect/rewrite to. This path is effectively the rule's "to" path that // has been templated with all the placeholders (if any) from the originally requested URL. func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) { - path := originalURL.Path - if isDomainURL(rule.From) { - if !isHostMatching(originalURL, rule.From) { - return false, "" - } + hostMatches, fromPath := matchHost(originalURL, rule.From) - path = originalURL.String() + if !hostMatches { + return false, "" } + path := originalURL.Path + if !feature.DomainRedirects.Enabled() && isDomainURL(rule.To) { return false, "" } @@ -51,11 +50,11 @@ func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, strin // and compiling the regex below. // However, only do this if there's nothing to template in the "to" path, // to avoid redirect/rewriting to a originalURL with a literal `:placeholder` in it. - if normalizePath(rule.From) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) { + if normalizePath(fromPath) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) { return true, rule.To } - return matchesRuleWithPlaceholderOrSplats(rule, path) + return matchesRuleWithPlaceholderOrSplats(path, fromPath, rule.To, rule.Status) } // matchesRuleWithPlaceholderOrSplats returns `true` if the rule's "from" pattern matches the requested URL. @@ -73,14 +72,14 @@ func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, strin // If the first return value is `true`, the second return value is the path that this // rule should redirect/rewrite to. This path is effectively the rule's "to" path that // has been templated with all the placeholders (if any) from the originally requested URL. -func matchesRuleWithPlaceholderOrSplats(rule *netlifyRedirects.Rule, path string) (bool, string) { +func matchesRuleWithPlaceholderOrSplats(requestPath string, fromPath string, toPath string, status int) (bool, string) { // Any logic beyond this point handles placeholders and splats. // If the FF_ENABLE_PLACEHOLDERS feature flag isn't enabled, exit now. if !feature.RedirectsPlaceholders.Enabled() { return false, "" } - regexSegments := convertToRegexSegments(rule.From) + regexSegments := convertToRegexSegments(fromPath) if len(regexSegments) == 0 { return false, "" } @@ -90,24 +89,24 @@ func matchesRuleWithPlaceholderOrSplats(rule *netlifyRedirects.Rule, path string if err != nil { log.WithFields(log.Fields{ "fromRegexString": fromRegexString, - "rule.From": rule.From, - "rule.To": rule.To, - "rule.Status": rule.Status, - "path": path, + "rule.From": fromPath, + "rule.To": toPath, + "rule.Status": status, + "path": requestPath, }).WithError(err).Warnf("matchesRule generated an invalid regex: %q", fromRegexString) return false, "" } - template := regexPlaceholderReplacement.ReplaceAllString(rule.To, `${$placeholder}`) - submatchIndex := fromRegex.FindStringSubmatchIndex(path) + template := regexPlaceholderReplacement.ReplaceAllString(toPath, `${$placeholder}`) + subMatchIndex := fromRegex.FindStringSubmatchIndex(requestPath) - if submatchIndex == nil { + if subMatchIndex == nil { return false, "" } var templatedToPath []byte - templatedToPath = fromRegex.ExpandString(templatedToPath, template, path, submatchIndex) + templatedToPath = fromRegex.ExpandString(templatedToPath, template, requestPath, subMatchIndex) // Some replacements result in subsequent slashes. For example, a rule with a "to" // like `foo/:splat/bar` will result in a path like `foo//bar` if the splat @@ -120,38 +119,12 @@ func matchesRuleWithPlaceholderOrSplats(rule *netlifyRedirects.Rule, path string return true, string(templatedToPath) } -// convertToRegexSegments returns array of regex segments +// convertToRegexSegments converts the path string to an array of regex segments +// It replaces placeholders with named capture groups and splat characters with a wildcard regex +// This allows matching the path segments to the request path and extracting matched placeholder values func convertToRegexSegments(path string) []string { var regexSegments []string - // For domain host - if strings.HasPrefix(path, "http") { - for _, segment := range strings.Split(path, "://") { - if segment == "" { - continue - } else if segment == "http" || segment == "https" { - regexSegments = append(regexSegments, regexp.QuoteMeta(segment+"://")) - } else { - parts := strings.SplitN(segment, "/", 2) - - if len(parts) != 2 { - return nil - } - - // adding host here - regexSegments = append(regexSegments, regexp.QuoteMeta(parts[0])) - path = "/" + parts[1] - } - } - } - - return append(regexSegments, processPathSegments(path)...) -} - -// Process path segments -func processPathSegments(path string) []string { - var regexSegments []string - for _, segment := range strings.Split(path, "/") { if segment == "" { continue diff --git a/internal/redirects/matching_test.go b/internal/redirects/matching_test.go index 956436bc7..3938b6803 100644 --- a/internal/redirects/matching_test.go +++ b/internal/redirects/matching_test.go @@ -162,12 +162,6 @@ func testMatchesRule(t *testing.T) { expectMatch: true, expectedPath: "/qux/foo/bar", }, - "multiple_splats": { - rule: "/foo/*/bar/*/baz /qux/:splat", - path: "/foo/hello/bar/world/baz", - expectMatch: true, - expectedPath: "/qux/hello", - }, "duplicate_splat_replacements": { rule: "/foo/* /qux/:splat/:splat", path: "/foo/hello", @@ -506,12 +500,6 @@ func Test_matchesDomainRule(t *testing.T) { expectMatch: true, expectedPath: "http://test.example.io/qux/foo/bar", }, - "multiple_splats": { - rule: "http://pages.example.io/foo/*/bar/*/baz http://test.example.io/qux/:splat", - path: "http://pages.example.io/foo/hello/bar/world/baz", - expectMatch: true, - expectedPath: "http://test.example.io/qux/hello", - }, "duplicate_splat_replacements": { rule: "http://pages.example.io/foo/* http://test.example.io/qux/:splat/:splat", path: "http://pages.example.io/foo/hello", diff --git a/internal/redirects/redirects.go b/internal/redirects/redirects.go index 3fb2525c3..2c01f396a 100644 --- a/internal/redirects/redirects.go +++ b/internal/redirects/redirects.go @@ -56,6 +56,7 @@ var ( errNoStartingForwardSlashInURLPath = errors.New("url path must start with forward slash /") errNoValidStartingInURLPath = errors.New("url path must start with forward slash / or http:// or https://") errNoSplats = errors.New("splats are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") + errMoreThanOneSplats = errors.New("rule cannot contain more than 1 asterisk (*) in its from path") errNoPlaceholders = errors.New("placeholders are not enabled. See https://docs.gitlab.com/ee/user/project/pages/redirects.html#feature-flag-for-rewrites") errNoParams = errors.New("params not supported") errUnsupportedStatus = errors.New("status not supported") diff --git a/internal/redirects/utils.go b/internal/redirects/utils.go index 172ae82ba..b9aa819f8 100644 --- a/internal/redirects/utils.go +++ b/internal/redirects/utils.go @@ -15,15 +15,22 @@ func isDomainURL(urlString string) bool { return false } -// isHostMatching checks if the host and scheme of the originalURL matches the parsed host and scheme of the input path URL. -// It returns true if they match, false otherwise. -func isHostMatching(originalURL *url.URL, path string) bool { +// Write documentation for below method +// matchHost checks if the originalURL matches the domain and path provided in path argument. +// It returns a bool indicating if there is a host match and the matched path. +// It returns true and path when path does not contain scheme and host +func matchHost(originalURL *url.URL, path string) (bool, string) { + if !isDomainURL(path) { + return true, path + } + parsedURL, err := url.Parse(path) if err != nil { - return false + return false, "" } + if originalURL.Scheme == parsedURL.Scheme && originalURL.Host == parsedURL.Host { - return true + return true, parsedURL.Path } - return false + return false, "" } diff --git a/internal/redirects/utils_test.go b/internal/redirects/utils_test.go index 0bb46e65d..cda24e305 100644 --- a/internal/redirects/utils_test.go +++ b/internal/redirects/utils_test.go @@ -37,36 +37,36 @@ func TestIsDomainURL(t *testing.T) { } } -func TestIsHostMatching(t *testing.T) { +func TestMatchHost(t *testing.T) { tests := map[string]struct { originalURL string path string expectedBool bool + expectedPath string }{ "path_without_domain": { originalURL: "https://GitLab.com/goto.html", path: "/goto.html", - expectedBool: false, - }, - "path_without_schema": { - originalURL: "https://GitLab.com/goto.html", - path: "GitLab.com/goto.html", - expectedBool: false, + expectedBool: true, + expectedPath: "/goto.html", }, "valid_matching_host": { originalURL: "https://GitLab.com/goto.html", path: "https://GitLab.com/goto.html", expectedBool: true, + expectedPath: "/goto.html", }, "different_schema_path": { originalURL: "http://GitLab.com/goto.html", path: "https://GitLab.com/goto.html", expectedBool: false, + expectedPath: "", }, "different_host_path": { originalURL: "https://GitLab.com/goto.html", path: "https://GitLab-test.com/goto.html", expectedBool: false, + expectedPath: "", }, } @@ -75,7 +75,9 @@ func TestIsHostMatching(t *testing.T) { parsedURL, err := url.Parse(tt.originalURL) require.NoError(t, err) - require.EqualValues(t, tt.expectedBool, isHostMatching(parsedURL, tt.path)) + hostMatches, path := matchHost(parsedURL, tt.path) + require.EqualValues(t, tt.expectedBool, hostMatches) + require.EqualValues(t, tt.expectedPath, path) }) } } diff --git a/internal/redirects/validations.go b/internal/redirects/validations.go index 8a6889570..d28e9aba8 100644 --- a/internal/redirects/validations.go +++ b/internal/redirects/validations.go @@ -18,23 +18,28 @@ var ( regexPlaceholderReplacement = regexp.MustCompile(`(?i):(?P[a-z]+)`) ) +// validateFromURL validates the from URL in a redirect rule. +// It checks for various invalid cases like unsupported schemes, +// relative URLs, domain redirects without scheme, etc. +// Returns `nil` if the URL is valid. func validateFromURL(urlText string) error { - url, err := url.Parse(urlText) + fromURL, err := url.Parse(urlText) if err != nil { return errFailedToParseURL } - // No support for domain level redirects starting with // or special characters: + + // No support for domain level redirects starting with special characters without scheme: // - `//google.com` // - `/\google.com` - if strings.HasPrefix(urlText, "//") || strings.HasPrefix(url.Path, "/\\") { + if (fromURL.Host == "") != (fromURL.Scheme == "") || strings.HasPrefix(fromURL.Path, "/\\") { return errNoValidStartingInURLPath } - if url.Scheme != "" && url.Scheme != "http" && url.Scheme != "https" { + if fromURL.Scheme != "" && fromURL.Scheme != "http" && fromURL.Scheme != "https" { return errNoValidStartingInURLPath } - if url.Scheme == "" && url.Host == "" { + if fromURL.Scheme == "" && fromURL.Host == "" { // No parent traversing relative URL's with `./` or `../` // No ambiguous URLs like bare domains `GitLab.com` if !strings.HasPrefix(urlText, "/") { @@ -42,14 +47,18 @@ func validateFromURL(urlText string) error { } } - return validateSplatAndPlaceholders(url) + if feature.RedirectsPlaceholders.Enabled() && strings.Count(fromURL.Path, "/*") > 1 { + return errMoreThanOneSplats + } + + return validateSplatAndPlaceholders(fromURL.Path) } // validateURL runs validations against a rule URL. // Returns `nil` if the URL is valid. // nolint: gocyclo func validateToURL(urlText string, status int) error { - url, err := url.Parse(urlText) + toURL, err := url.Parse(urlText) if err != nil { return errFailedToParseURL } @@ -59,7 +68,7 @@ func validateToURL(urlText string, status int) error { // No support for domain level redirects starting with // or special characters: // - `//google.com` // - `/\google.com` - if strings.HasPrefix(urlText, "//") || strings.HasPrefix(url.Path, "/\\") { + if (toURL.Host == "") != (toURL.Scheme == "") || strings.HasPrefix(toURL.Path, "/\\") { return errNoValidStartingInURLPath } @@ -81,7 +90,7 @@ func validateToURL(urlText string, status int) error { // - `https://google.com` // - `//google.com` // - `/\google.com` - if url.Host != "" || url.Scheme != "" || strings.HasPrefix(url.Path, "/\\") { + if toURL.Host != "" || toURL.Scheme != "" || strings.HasPrefix(toURL.Path, "/\\") { return errNoDomainLevelRedirects } @@ -92,29 +101,26 @@ func validateToURL(urlText string, status int) error { } } - return validateSplatAndPlaceholders(url) + return validateSplatAndPlaceholders(toURL.Path) } -func validateSplatAndPlaceholders(url *url.URL) error { +func validateSplatAndPlaceholders(path string) error { if feature.RedirectsPlaceholders.Enabled() { maxPathSegments := cfg.MaxPathSegments - if url.Scheme != "" { - maxPathSegments += 2 - } // Limit the number of path segments a rule can contain. // This prevents the matching logic from generating regular // expressions that are too large/complex. - if strings.Count(url.Path, "/") > maxPathSegments { + if strings.Count(path, "/") > maxPathSegments { return fmt.Errorf("url path cannot contain more than %d forward slashes", cfg.MaxPathSegments) } } else { // No support for splats, https://docs.netlify.com/routing/redirects/redirect-options/#splats - if strings.Contains(url.Path, "*") { + if strings.Contains(path, "*") { return errNoSplats } // No support for placeholders, https://docs.netlify.com/routing/redirects/redirect-options/#placeholders - if regexpPlaceholder.MatchString(url.Path) { + if regexpPlaceholder.MatchString(path) { return errNoPlaceholders } } diff --git a/internal/redirects/validations_test.go b/internal/redirects/validations_test.go index 44bbcbfd6..13f115cd3 100644 --- a/internal/redirects/validations_test.go +++ b/internal/redirects/validations_test.go @@ -51,6 +51,10 @@ func TestRedirectsValidateFromUrl(t *testing.T) { "splats": { url: "/blog/*", }, + "no_multiple_splats_redirects": { + url: "/foo/*/bar/*/baz", + expectedErr: errMoreThanOneSplats, + }, "splat_placeholders": { url: "/new/path/:splat", }, -- GitLab From b6a87a019bf6b487cb6de5ebd405cdc78322296d Mon Sep 17 00:00:00 2001 From: ngala Date: Tue, 19 Dec 2023 17:26:25 +0530 Subject: [PATCH 6/6] Address review comment --- internal/redirects/validations.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/redirects/validations.go b/internal/redirects/validations.go index d28e9aba8..c469ecfc6 100644 --- a/internal/redirects/validations.go +++ b/internal/redirects/validations.go @@ -22,6 +22,7 @@ var ( // It checks for various invalid cases like unsupported schemes, // relative URLs, domain redirects without scheme, etc. // Returns `nil` if the URL is valid. +// nolint: gocyclo func validateFromURL(urlText string) error { fromURL, err := url.Parse(urlText) if err != nil { -- GitLab