Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(regexfilters): Better Structure and Readability #1261

Merged
merged 11 commits into from
Aug 25, 2023
Merged
104 changes: 104 additions & 0 deletions add-on/src/lib/redirect-handler/baseRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
export interface IRegexFilter {
originUrl: string
redirectUrl: string
}

export interface IFilter {
regexFilter: string
regexSubstitution: string
}

/**
* Base class for all regex filters.
*/
export class RegexFilter {
readonly _redirectUrl!: string
readonly _originUrl!: string
readonly originURL: URL
readonly redirectURL: URL
readonly originNS: string
readonly redirectNS: string
// by default we cannot handle the request.
private _canHandle = false
regexFilter!: string
regexSubstitution!: string

constructor ({ originUrl, redirectUrl }: IRegexFilter) {
this._originUrl = originUrl
this._redirectUrl = redirectUrl
this.originURL = new URL(this._originUrl)
this.redirectURL = new URL(this._redirectUrl)
this.redirectNS = this.computeNamespaceFromUrl(this.redirectURL)
this.originNS = this.computeNamespaceFromUrl(this.originURL)
this.computeFilter()
this.normalizeRegexFilter()
}
Comment on lines +26 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we have a method that filters have to implement called setCanHandle or should we expect _canHandle to be set by child classes?

I've seen a few different patterns in the child classes and I think we could normalize them so things don't get out of hand in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already had the set canHandle() implemented, just didn't use it right, it's refactored now, I think it'll make more sense now.


/**
* Getter for the originUrl provided at construction.
*/
get originUrl (): string {
return this._originUrl
}

/**
* Getter for the redirectUrl provided at construction.
*/
get redirectUrl (): string {
return this._redirectUrl
}

/**
* Getter for the canHandle flag.
*/
get canHandle (): boolean {
return this._canHandle
}

/**
* Setter for the canHandle flag.
*/
set canHandle (value: boolean) {
this._canHandle = value
}

/**
* Getter for the filter. This is the regex filter and substitution.
*/
get filter (): IFilter {
if (!this.canHandle) {
throw new Error('Cannot handle this request')
}

return {
regexFilter: this.regexFilter,
regexSubstitution: this.regexSubstitution
}
}

/**
* Compute the regex filter and substitution.
* This is the main method that needs to be implemented by subclasses.
*/
computeFilter (): void {
throw new Error('Method not implemented.')
}

/**
* Normalize the regex filter. This is a helper method that can be used by subclasses.
*/
normalizeRegexFilter (): void {
this.regexFilter = this.regexFilter.replace(/https?\??/ig, 'https?')
}

/**
* Compute the namespace from the URL. This finds the first path segment.
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid
*
* @param url URL
*/
computeNamespaceFromUrl ({ pathname }: URL): string {
// regex to match the first path segment.
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase()
}
}
141 changes: 22 additions & 119 deletions add-on/src/lib/redirect-handler/blockOrObserve.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import debug from 'debug'
import browser from 'webextension-polyfill'
import { CompanionState } from '../../types/companion.js'
import isIPFS from 'is-ipfs'
import { IFilter, IRegexFilter, RegexFilter } from './baseRegexFilter.js'
import { CommonPatternRedirectRegexFilter } from './commonPatternRedirectRegexFilter.js'
import { NamespaceRedirectRegexFilter } from './namespaceRedirectRegexFilter.js'
import { SubdomainRedirectRegexFilter } from './subdomainRedirectRegexFilter.js'

// this won't work in webworker context. Needs to be enabled manually
// https://github.com/debug-js/debug/issues/916
const log = debug('ipfs-companion:redirect-handler:blockOrObserve')
log.error = debug('ipfs-companion:redirect-handler:blockOrObserve:error')

const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns'])
export const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns'])

export const GLOBAL_STATE_CHANGE = 'GLOBAL_STATE_CHANGE'
export const GLOBAL_STATE_OPTION_CHANGE = 'GLOBAL_STATE_OPTION_CHANGE'
Expand All @@ -35,7 +38,7 @@ interface messageToSelf {
value?: string | Record<string, unknown>
}

const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})`
export const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})`

// We need to check if the browser supports the declarativeNetRequest API.
// TODO: replace with check for `Blocking` in `chrome.webRequest.OnBeforeRequestOptions`
Expand Down Expand Up @@ -116,138 +119,38 @@ export function isLocalHost (url: string): boolean {
* @param str URL string to escape
* @returns
*/
function escapeURLRegex (str: string): string {
export function escapeURLRegex (str: string): string {
// these characters are allowed in the URL, but not in the regex.
// eslint-disable-next-line no-useless-escape
const ALLOWED_CHARS_URL_REGEX = /([:\/\?#\[\]@!$&'\(\ )\*\+,;=\-_\.~])/g
return str.replace(ALLOWED_CHARS_URL_REGEX, '\\$1')
}

/**
* Compute the namespace from the URL. This finds the first path segment.
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid
*
* @param url string
*/
function computeNamespaceFromUrl (url: string): string {
const { pathname } = new URL(url)
// regex to match the first path segment.
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase()
}

/**
* Construct a regex filter and substitution for a redirect.
*
* @param originUrl
* @param redirectUrl
* @returns
*/
function constructRegexFilter ({ originUrl, redirectUrl }: redirectHandlerInput): {
regexSubstitution: string
regexFilter: string
} {
let regexSubstitution = redirectUrl
let regexFilter = originUrl
const originURL = new URL(originUrl)
const redirectNS = computeNamespaceFromUrl(redirectUrl)
const originNS = computeNamespaceFromUrl(originUrl)
if (!DEFAULT_NAMESPACES.has(originNS) && DEFAULT_NAMESPACES.has(redirectNS)) {
// A redirect like https://github.com/ipfs/ipfs-companion/issues/1255
regexFilter = `^${escapeURLRegex(regexFilter)}`.replace(/https?/ig, 'https?')
const origRegexFilter = regexFilter

const [tld, root, ...subdomain] = originURL.hostname.split('.').reverse()
const staticUrl = [root, tld]
while (subdomain.length > 0) {
const subdomainPart = subdomain.shift()
const commonStaticUrlStart = `^${originURL.protocol}\\:\\/\\/`
const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrl.join('.'))}\\/${RULE_REGEX_ENDING}`
if (isIPFS.cid(subdomainPart as string)) {
// We didn't find a namespace, but we found a CID
// e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co
regexFilter = `${commonStaticUrlStart}(.*?)${commonStaticUrlEnd}`
regexSubstitution = redirectUrl
.replace(subdomainPart as string, '\\1') // replace CID
.replace(new RegExp(`${originURL.pathname}?$`), '\\2') // replace path

break
}
if (DEFAULT_NAMESPACES.has(subdomainPart as string)) {
// We found a namespace, this is going to match group 2, i.e. namespace.
// e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link
regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}`

regexSubstitution = redirectUrl
.replace(subdomain.reverse().join('.'), '\\1') // replace subdomain or CID.
.replace(`/${subdomainPart as string}/`, '/\\2/') // replace namespace dynamically.

const pathWithSearch = originURL.pathname + originURL.search
if (pathWithSearch !== '/') {
regexSubstitution = regexSubstitution.replace(pathWithSearch, '/\\3') // replace path
} else {
regexSubstitution += '\\3'
}

break
}
// till we find a namespace or CID, we keep adding subdomains to the staticUrl.
staticUrl.unshift(subdomainPart as string)
}

if (regexFilter !== origRegexFilter) {
// we found a valid regexFilter, so we can return.
return { regexSubstitution, regexFilter }
} else {
// we didn't find a valid regexFilter, so we can return the default.
regexFilter = originUrl
}
}

// if the namespaces are the same, we can generate simpler regex.
// The only value that needs special handling is the `uri` param.
if (
DEFAULT_NAMESPACES.has(originNS) &&
DEFAULT_NAMESPACES.has(redirectNS) &&
originNS === redirectNS &&
originURL.searchParams.get('uri') == null
) {
// A redirect like
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU
const [originFirst, originLast] = originUrl.split(`/${originNS}/`)
regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
.replace(/https?/ig, 'https?')
regexSubstitution = redirectUrl
.replace(`/${redirectNS}/`, '/\\1/')
.replace(originLast, '\\2')
return { regexSubstitution, regexFilter }
}

// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
while (commonIdx < Math.min(originUrl.length, redirectUrl.length)) {
if (originUrl[originUrl.length - commonIdx] !== redirectUrl[redirectUrl.length - commonIdx]) {
break
function constructRegexFilter ({ originUrl, redirectUrl }: IRegexFilter): IFilter {
// the order is very important here, because we want to match the best possible filter.
const filtersToTryInOrder: Array<typeof RegexFilter> = [
SubdomainRedirectRegexFilter,
NamespaceRedirectRegexFilter,
CommonPatternRedirectRegexFilter
]

for (const Filter of filtersToTryInOrder) {
const filter = new Filter({ originUrl, redirectUrl })
if (filter.canHandle) {
return filter.filter
Comment on lines +144 to +147
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes sure, we init the filter and if the filter can handle, then return the values.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so much cleaner and easy to follow.. and with the regexFilter's abstracted into a class, I think it'll be much easier for all of us. Huge improvement!

}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
regexSubstitution = redirectUrl.slice(0, redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(originUrl.slice(0, originUrl.length - commonIdx + 1))
regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`.replace(/https?/ig, 'https?')

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(originUrl).hostname
regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
regexSubstitution = regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}

return { regexSubstitution, regexFilter }
// this is just to satisfy the compiler, this should never happen. Because CommonPatternRedirectRegexFilter can always
// handle.
return new CommonPatternRedirectRegexFilter({ originUrl, redirectUrl }).filter
}

// If the browser supports the declarativeNetRequest API, we can block the request.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { RegexFilter } from './baseRegexFilter.js'
import { RULE_REGEX_ENDING, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles redirects like:
* origin: '^https?\\:\\/\\/awesome\\.ipfs\\.io\\/(.*)'
* destination: 'http://localhost:8081/ipns/awesome.ipfs.io/$1'
*/
export class CommonPatternRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
// this filter is the worst case scenario, we can handle any redirect.
this.canHandle = true
// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
const leastLength = Math.min(this.originUrl.length, this.redirectUrl.length)
while (commonIdx < leastLength) {
if (this.originUrl[this.originUrl.length - commonIdx] !== this.redirectUrl[this.redirectUrl.length - commonIdx]) {
break
}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
this.regexSubstitution = this.redirectUrl.slice(0, this.redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(this.originUrl.slice(0, this.originUrl.length - commonIdx + 1))
this.regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`
// calling normalize should add the protocol in the regexFilter.
this.normalizeRegexFilter()

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (this.regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(this.originUrl).hostname
this.regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
this.regexSubstitution = this.regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}
}
}
25 changes: 25 additions & 0 deletions add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { RegexFilter } from './baseRegexFilter.js'
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles namespace redirects like:
* origin: '^https?\\:\\/\\/ipfs\\.io\\/(ipfs|ipns)\\/(.*)'
* destination: 'http://localhost:8080/$1/$2'
*/
export class NamespaceRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
this.canHandle = DEFAULT_NAMESPACES.has(this.originNS) &&
DEFAULT_NAMESPACES.has(this.redirectNS) &&
this.originNS === this.redirectNS &&
this.originURL.searchParams.get('uri') == null
// if the namespaces are the same, we can generate simpler regex.
// The only value that needs special handling is the `uri` param.
// A redirect like
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU
const [originFirst, originLast] = this.originUrl.split(`/${this.originNS}/`)
this.regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
this.regexSubstitution = this.redirectUrl
.replace(`/${this.redirectNS}/`, '/\\1/')
.replace(originLast, '\\2')
}
}
Loading