From 7c8d6b0eeee5503702db23ae48637e904446d7d1 Mon Sep 17 00:00:00 2001 From: Kevin R Date: Sun, 7 Dec 2025 21:36:15 +0100 Subject: [PATCH 1/5] Pre-compiled Regular Expressions Previously, regular expressions were compiled inside loops for every URL processed. Now RegExp objects are created once when rules are loaded and stored in the provider instance. --- clearurls.js | 126 ++++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 72 deletions(-) diff --git a/clearurls.js b/clearurls.js index 66a4aee..7d8a205 100644 --- a/clearurls.js +++ b/clearurls.js @@ -89,14 +89,14 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) { /* * Apply raw rules to the URL. */ - rawRules.forEach(function (rawRule) { + rawRules.forEach(function ({ rule: rawRuleStr, regex: rawRuleRegex }) { let beforeReplace = url; - url = url.replace(new RegExp(rawRule, "gi"), ""); + url = url.replace(rawRuleRegex, ""); if (beforeReplace !== url) { //Log the action if (storage.loggingStatus && !quiet) { - pushToLog(beforeReplace, url, rawRule); + pushToLog(beforeReplace, url, rawRuleStr); } increaseBadged(quiet, request); @@ -113,13 +113,16 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) { * Only test for matches, if there are fields or fragments that can be cleaned. */ if (fields.toString() !== "" || fragments.toString() !== "") { - rules.forEach(rule => { + rules.forEach(({ rule, regex }) => { const beforeFields = fields.toString(); const beforeFragments = fragments.toString(); let localChange = false; + // Use the pre-compiled regex. + // Note: reusing a regex without 'g' flag is safe for .test() + for (const field of fields.keys()) { - if (new RegExp("^"+rule+"$", "gi").test(field)) { + if (regex.test(field)) { fields.delete(field); changes = true; localChange = true; @@ -127,7 +130,7 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) { } for (const fragment of fragments.keys()) { - if (new RegExp("^"+rule+"$", "gi").test(fragment)) { + if (regex.test(fragment)) { fragments.delete(fragment); changes = true; localChange = true; @@ -360,7 +363,8 @@ function start() { let methods = []; if (_completeProvider) { - enabled_rules[".*"] = true; + // enabled_rules[".*"] = true; // Original + enabled_rules[".*"] = new RegExp("^.*$", "i"); } /** @@ -402,29 +406,26 @@ function start() { * @return {boolean} ProviderURL as RegExp */ this.matchURL = function (url) { + // Use matchException internal logic return urlPattern.test(url) && !(this.matchException(url)); }; /** - * Apply a rule to a given tuple of rule array. - * @param enabledRuleArray array for enabled rules - * @param disabledRulesArray array for disabled rules - * @param {String} rule RegExp as string - * @param {boolean} isActive Is this rule active? + * Helper to update rule maps with compiled regexes. */ - this.applyRule = (enabledRuleArray, disabledRulesArray, rule, isActive = true) => { + const updateRule = (enabledMap, disabledMap, rule, isActive, compileFn) => { if (isActive) { - enabledRuleArray[rule] = true; - - if (disabledRulesArray[rule] !== undefined) { - delete disabledRulesArray[rule]; + if (!enabledMap[rule]) { + try { + enabledMap[rule] = compileFn(rule); + } catch (e) { + console.error("Invalid regex", rule, e); + } } + if (disabledMap[rule]) delete disabledMap[rule]; } else { - disabledRulesArray[rule] = true; - - if (enabledRuleArray[rule] !== undefined) { - delete enabledRuleArray[rule]; - } + disabledMap[rule] = true; + if (enabledMap[rule]) delete enabledMap[rule]; } }; @@ -436,20 +437,22 @@ function start() { * @param {boolean} isActive Is this rule active? */ this.addRule = function (rule, isActive = true) { - this.applyRule(enabled_rules, disabled_rules, rule, isActive); + updateRule(enabled_rules, disabled_rules, rule, isActive, r => new RegExp("^" + r + "$", "i")); }; /** - * Return all active rules as an array. + * Return all active rules as an array of {rule, regex}. * - * @return Array RegExp strings + * @return Array Objects */ this.getRules = function () { - if (!storage.referralMarketing) { - return Object.keys(Object.assign(enabled_rules, enabled_referralMarketing)); + let source = enabled_rules; + if (storage.referralMarketing) { + // Determine if we need to merge referral marketing rules + // We use a new object to avoid mutating enabled_rules via Object.assign if that was happening + source = Object.assign({}, enabled_rules, enabled_referralMarketing); } - - return Object.keys(enabled_rules); + return Object.entries(source).map(([rule, regex]) => ({ rule, regex })); }; /** @@ -460,16 +463,17 @@ function start() { * @param {boolean} isActive Is this rule active? */ this.addRawRule = function (rule, isActive = true) { - this.applyRule(enabled_rawRules, disabled_rawRules, rule, isActive); + updateRule(enabled_rawRules, disabled_rawRules, rule, isActive, r => new RegExp(r, "gi")); }; /** * Return all active raw rules as an array. * - * @return Array RegExp strings + * @return Array Objects {rule, regex} */ this.getRawRules = function () { - return Object.keys(enabled_rawRules); + // return Object.keys(enabled_rawRules); + return Object.entries(enabled_rawRules).map(([rule, regex]) => ({ rule, regex })); }; /** @@ -480,7 +484,7 @@ function start() { * @param {boolean} isActive Is this rule active? */ this.addReferralMarketing = function (rule, isActive = true) { - this.applyRule(enabled_referralMarketing, disabled_referralMarketing, rule, isActive); + updateRule(enabled_referralMarketing, disabled_referralMarketing, rule, isActive, r => new RegExp("^" + r + "$", "i")); }; /** @@ -491,19 +495,7 @@ function start() { * @param {Boolean} isActive Is this exception active? */ this.addException = function (exception, isActive = true) { - if (isActive) { - enabled_exceptions[exception] = true; - - if (disabled_exceptions[exception] !== undefined) { - delete disabled_exceptions[exception]; - } - } else { - disabled_exceptions[exception] = true; - - if (enabled_exceptions[exception] !== undefined) { - delete enabled_exceptions[exception]; - } - } + updateRule(enabled_exceptions, disabled_exceptions, exception, isActive, r => new RegExp(r, "i")); }; /** @@ -541,11 +533,10 @@ function start() { //Add the site blocked alert to every exception if (url === siteBlockedAlert) return true; - for (const exception in enabled_exceptions) { + for (const [exception, regex] of Object.entries(enabled_exceptions)) { if (result) break; - - let exception_regex = new RegExp(exception, "i"); - result = exception_regex.test(url); + // let exception_regex = new RegExp(exception, "i"); // Old + result = regex.test(url); } return result; @@ -559,19 +550,7 @@ function start() { * @param {Boolean} isActive Is this redirection active? */ this.addRedirection = function (redirection, isActive = true) { - if (isActive) { - enabled_redirections[redirection] = true; - - if (disabled_redirections[redirection] !== undefined) { - delete disabled_redirections[redirection]; - } - } else { - disabled_redirections[redirection] = true; - - if (enabled_redirections[redirection] !== undefined) { - delete enabled_redirections[redirection]; - } - } + updateRule(enabled_redirections, disabled_redirections, redirection, isActive, r => new RegExp(r, "i")); }; /** @@ -582,11 +561,14 @@ function start() { this.getRedirection = function (url) { let re = null; - for (const redirection in enabled_redirections) { - let result = (url.match(new RegExp(redirection, "i"))); + for (const [redirection, regex] of Object.entries(enabled_redirections)) { + // let result = (url.match(new RegExp(redirection, "i"))); // Old + let result = url.match(regex); if (result && result.length > 0 && redirection) { - re = (new RegExp(redirection, "i")).exec(url)[1]; + // re = (new RegExp(redirection, "i")).exec(url)[1]; + // Reuse match result if possible, but exec is same as match for non-g. + re = result[1]; // Capture group 1 break; } @@ -623,7 +605,7 @@ function start() { pushToLog(request.url, request.url, translate('log_ping_blocked')); increaseBadged(false, request); increaseTotalCounter(1); - return {cancel: true}; + return { cancel: true }; } /* @@ -642,8 +624,8 @@ function start() { if (result.redirect) { if (providers[i].shouldForceRedirect() && request.type === 'main_frame') { - browser.tabs.update(request.tabId, {url: result.url}).catch(handleError); - return {cancel: true}; + browser.tabs.update(request.tabId, { url: result.url }).catch(handleError); + return { cancel: true }; } return { @@ -658,9 +640,9 @@ function start() { if (result.cancel) { if (request.type === 'main_frame') { const blockingPage = browser.runtime.getURL("html/siteBlockedAlert.html?source=" + encodeURIComponent(request.url)); - browser.tabs.update(request.tabId, {url: blockingPage}).catch(handleError); + browser.tabs.update(request.tabId, { url: blockingPage }).catch(handleError); - return {cancel: true}; + return { cancel: true }; } else { return { redirectUrl: siteBlockedAlert @@ -723,7 +705,7 @@ function start() { */ browser.webRequest.onBeforeRequest.addListener( promise, - {urls: [""], types: getData("types").concat(getData("pingRequestTypes"))}, + { urls: [""], types: getData("types").concat(getData("pingRequestTypes")) }, ["blocking"] ); } -- GitLab From dc2325763269b97919ab551666766480176bc961 Mon Sep 17 00:00:00 2001 From: Kevin R Date: Sun, 7 Dec 2025 21:37:03 +0100 Subject: [PATCH 2/5] Bump version --- manifest.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/manifest.json b/manifest.json index 458f130..eebe47f 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 2, "name": "ClearURLs", - "version": "1.27.3", + "version": "1.28.0", "author": "Kevin Roebert", "description": "__MSG_extension_description__", "homepage_url": "https://docs.clearurls.xyz", @@ -270,18 +270,18 @@ "*://*.google.co.zw/*", "*://*.google.cat/*" ], - "include_globs": [ - "http?://www.google.*/", + "include_globs": [ + "http?://www.google.*/", "http?://www.google.*/#hl=*", - "http?://www.google.*/search*", + "http?://www.google.*/search*", "http?://www.google.*/webhp?hl=*", - "https://encrypted.google.*/", + "https://encrypted.google.*/", "https://encrypted.google.*/#hl=*", - "https://encrypted.google.*/search*", + "https://encrypted.google.*/search*", "https://encrypted.google.*/webhp?hl=*", - "http?://ipv6.google.com/", + "http?://ipv6.google.com/", "http?://ipv6.google.com/search*" - ], + ], "js": [ "core_js/google_link_fix.js" ], @@ -303,4 +303,4 @@ "options_ui": { "page": "html/settings.html" } -} +} \ No newline at end of file -- GitLab From 9cf1704100cce31be2c8d9c8b92dd4d88110438a Mon Sep 17 00:00:00 2001 From: Kevin R Date: Sun, 7 Dec 2025 21:47:41 +0100 Subject: [PATCH 3/5] Fix --- clearurls.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearurls.js b/clearurls.js index 7d8a205..0989d3f 100644 --- a/clearurls.js +++ b/clearurls.js @@ -447,7 +447,7 @@ function start() { */ this.getRules = function () { let source = enabled_rules; - if (storage.referralMarketing) { + if (!storage.referralMarketing) { // Determine if we need to merge referral marketing rules // We use a new object to avoid mutating enabled_rules via Object.assign if that was happening source = Object.assign({}, enabled_rules, enabled_referralMarketing); -- GitLab From 2f8403f1ce5934fa9a7a9ca979e396411220088d Mon Sep 17 00:00:00 2001 From: Kevin R Date: Sun, 7 Dec 2025 21:50:02 +0100 Subject: [PATCH 4/5] Removed comments --- clearurls.js | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/clearurls.js b/clearurls.js index 0989d3f..ad3b069 100644 --- a/clearurls.js +++ b/clearurls.js @@ -118,9 +118,6 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) { const beforeFragments = fragments.toString(); let localChange = false; - // Use the pre-compiled regex. - // Note: reusing a regex without 'g' flag is safe for .test() - for (const field of fields.keys()) { if (regex.test(field)) { fields.delete(field); @@ -363,7 +360,6 @@ function start() { let methods = []; if (_completeProvider) { - // enabled_rules[".*"] = true; // Original enabled_rules[".*"] = new RegExp("^.*$", "i"); } @@ -406,7 +402,6 @@ function start() { * @return {boolean} ProviderURL as RegExp */ this.matchURL = function (url) { - // Use matchException internal logic return urlPattern.test(url) && !(this.matchException(url)); }; @@ -535,7 +530,6 @@ function start() { for (const [exception, regex] of Object.entries(enabled_exceptions)) { if (result) break; - // let exception_regex = new RegExp(exception, "i"); // Old result = regex.test(url); } @@ -562,13 +556,10 @@ function start() { let re = null; for (const [redirection, regex] of Object.entries(enabled_redirections)) { - // let result = (url.match(new RegExp(redirection, "i"))); // Old let result = url.match(regex); if (result && result.length > 0 && redirection) { - // re = (new RegExp(redirection, "i")).exec(url)[1]; - // Reuse match result if possible, but exec is same as match for non-g. - re = result[1]; // Capture group 1 + re = result[1]; break; } -- GitLab From 81d5b35fe202c2c22ae62b79d26019e0bc2f62f6 Mon Sep 17 00:00:00 2001 From: Kevin R Date: Sun, 7 Dec 2025 22:10:16 +0100 Subject: [PATCH 5/5] Added tokenzied provider lookup Reduce lookup overhead by using map for provider matching --- clearurls.js | 81 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/clearurls.js b/clearurls.js index ad3b069..ea1730b 100644 --- a/clearurls.js +++ b/clearurls.js @@ -21,6 +21,8 @@ * This script is responsible for the core functionalities. */ var providers = []; +var providersByToken = {}; // Map +var globalProviders = []; // Provider[] var prvKeys = []; var siteBlockedAlert = 'javascript:void(0)'; var dataHash; @@ -228,6 +230,17 @@ function start() { for (let re = 0; re < methods.length; re++) { providers[p].addMethod(methods[re]); } + + // Indexing logic + const token = providers[p].getLookupToken(); + if (token) { + if (!providersByToken[token]) { + providersByToken[token] = []; + } + providersByToken[token].push(providers[p]); + } else { + globalProviders.push(providers[p]); + } } } @@ -379,6 +392,38 @@ function start() { return name; }; + /** + * Returns the lookup token for this provider, or null if global. + * Extracts "domain" from patterns like ^https?://(?:[a-z0-9-]+\.)*?domain... + * @return {String|null} + */ + this.getLookupToken = function () { + if (!urlPattern) return null; + const source = urlPattern.source; + + // Case 1: Wildcard prefix pattern (e.g. ...*?amazon...) + const wildcardMatch = source.match(/\*\?([a-z0-9-]+)/i); + if (wildcardMatch && wildcardMatch[1]) { + return wildcardMatch[1].toLowerCase(); + } + + // Case 2: Explicit start pattern (e.g. ^https?://vk.com...) + // Matches ^https?://(optional www.)token + // We strip standard regex start structure to find the first meaningful domain token. + // This regex handles: + // - ^https?:// (start) + // - \/\/ or // (slashes, potentially escaped) + // - www. (optional www prefix, potentially escaped) + // - [a-z0-9-]+ (the token) + // It deliberately fails on patterns with groups (?:...) at the start, falling back to global. + const explicitMatch = source.match(/^(\^?https?:\\?\/\\?\/)(?:www(?:\\?\.))?([a-z0-9-]+)/i); + if (explicitMatch && explicitMatch[2]) { + return explicitMatch[2].toLowerCase(); + } + + return null; + }; + /** * Add URL pattern. * @@ -599,13 +644,39 @@ function start() { return { cancel: true }; } + let host = ""; + try { + host = extractHost(new URL(request.url)); + } catch (e) { + // If URL parsing fails, we falls back to empty host, relying on global providers or skipping + } + + const hostTokens = host.split('.').map(t => t.toLowerCase()); + + // Collect candidate providers: Global + Key Matches + // Use a Set to avoid duplicates if multiple tokens map to same provider (unlikely but safe) + let candidateProviders = new Set(globalProviders); + + for (const token of hostTokens) { + if (providersByToken[token]) { + for (const p of providersByToken[token]) { + candidateProviders.add(p); + } + } + } + + // "providers" global var is still used for legacy, but here we iterate candidates + // Converting Set to Array for iteration + const candidates = Array.from(candidateProviders); + /* * Call for every provider the removeFieldsFormURL method. */ - for (let i = 0; i < providers.length; i++) { - if (!providers[i].matchMethod(request)) continue; - if (providers[i].matchURL(request.url)) { - result = removeFieldsFormURL(providers[i], request.url, false, request); + for (let i = 0; i < candidates.length; i++) { + const provider = candidates[i]; + if (!provider.matchMethod(request)) continue; + if (provider.matchURL(request.url)) { + result = removeFieldsFormURL(provider, request.url, false, request); } /* @@ -613,7 +684,7 @@ function start() { * Cancel the active request. */ if (result.redirect) { - if (providers[i].shouldForceRedirect() && + if (provider.shouldForceRedirect() && request.type === 'main_frame') { browser.tabs.update(request.tabId, { url: result.url }).catch(handleError); return { cancel: true }; -- GitLab