-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat: Optimize canSendEmailOtp #1301
Changes from 6 commits
d58a297
8687b13
978ec9e
c9cedb7
897bcbe
db15356
57e0a84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
import { ResultAsync, errAsync, okAsync } from "neverthrow" | ||
import { Op, ModelStatic, Transaction } from "sequelize" | ||
import { Op, ModelStatic, Transaction, QueryTypes } from "sequelize" | ||
import { Sequelize } from "sequelize-typescript" | ||
import { RequireAtLeastOne } from "type-fest" | ||
|
||
|
@@ -155,26 +155,51 @@ class UsersService { | |
|
||
async canSendEmailOtp(email: string) { | ||
const normalizedEmail = email.toLowerCase() | ||
const whitelistEntries = await this.whitelist.findAll({ | ||
attributes: ["email"], | ||
where: { | ||
expiry: { | ||
[Op.or]: [{ [Op.is]: null }, { [Op.gt]: new Date() }], | ||
|
||
// Raw query for readability because it uses 2 "unusual" query patterns | ||
// - a CASE WHEN in the WHERE clause | ||
// - a where condition of the form 'input like cell' (with a concat() call thrown in), as opposed to the more common 'cell like input' | ||
// | ||
// Why? We want to leverage the DB to see if a single record exists that whitelists the input | ||
// we do not want to download the whole table locally to filter in local code | ||
// | ||
// query logic: | ||
// - if whitelist entry is a full email (something before the @), then do exact match | ||
// - if whitelist entry is a domain (no @, or starting with @), then do suffix match | ||
// | ||
// Limit 1 is added to allow the query to exit early on first match | ||
const records = (await this.sequelize.query( | ||
` | ||
SELECT email | ||
FROM whitelist | ||
WHERE | ||
(expiry is NULL OR expiry > NOW()) | ||
AND | ||
CASE WHEN email ~ '^.+@' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: compared to the previous code, this is a simpler regexp. It detects the "full email", while the previous more complicated regexp was detecting the domains. Obviously the match strategies have been swapped accordingly. |
||
THEN email = :email | ||
ELSE :email LIKE CONCAT('%', email) | ||
END | ||
LIMIT 1 | ||
`, | ||
{ | ||
replacements: { email: normalizedEmail }, | ||
type: QueryTypes.SELECT, | ||
} | ||
)) as { email: string }[] | ||
|
||
if (records.length >= 1) { | ||
logger.info({ | ||
message: "Email valid for OTP by whitelist", | ||
meta: { | ||
email, | ||
whitelistEntry: records[0].email, | ||
}, | ||
}, | ||
}) | ||
const whitelistDomains = whitelistEntries.map((entry) => entry.email) | ||
const hasMatchDomain = | ||
whitelistDomains.filter((domain) => { | ||
// if domain is really just a domain (does not include a @ OR starts with a @), we can do a prefix match | ||
if (/^@|^[^@]+$/.test(domain)) { | ||
return normalizedEmail.endsWith(domain) | ||
} | ||
}) | ||
|
||
return true | ||
} | ||
|
||
return normalizedEmail === domain | ||
// otherwise we can ONLY do an exact match | ||
}).length > 0 | ||
return hasMatchDomain | ||
return false | ||
} | ||
|
||
async sendEmailOtp(email: string) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ const MockRepository = { | |
|
||
const MockSequelize = { | ||
transaction: jest.fn((closure) => closure("transaction")), | ||
query: jest.fn(), | ||
} | ||
|
||
const MockWhitelist = { | ||
|
@@ -120,71 +121,31 @@ describe("User Service", () => { | |
expect(actual.githubId).toBe(mockGithubId) | ||
}) | ||
|
||
it("should allow whitelisted emails", async () => { | ||
// Arrange | ||
const expected = true | ||
const mockWhitelistEntry = { | ||
email: ".gov.sg", | ||
} | ||
MockWhitelist.findAll.mockResolvedValueOnce([mockWhitelistEntry]) | ||
|
||
// Act | ||
const actual = await UsersService.canSendEmailOtp(mockEmail) | ||
|
||
// Assert | ||
expect(actual).toBe(expected) | ||
}) | ||
it("should not allow partial match of whitelisted emails", async () => { | ||
// Arrange | ||
const expected = false | ||
// NOTE: This ends with gov.sg not .gov.sg (lacks a dot after the @) | ||
const emailWithoutDot = "[email protected]" | ||
const mockWhitelistEntry = { | ||
email: ".gov.sg", | ||
} | ||
MockWhitelist.findAll.mockResolvedValueOnce([mockWhitelistEntry]) | ||
|
||
// Act | ||
const actual = await UsersService.canSendEmailOtp(emailWithoutDot) | ||
|
||
// Assert | ||
expect(actual).toBe(expected) | ||
}) | ||
|
||
it("should allow not .gov.sg emails when whitelist does not contain .gov.sg", async () => { | ||
// Arrange | ||
const expected = false | ||
const mockWhitelistEntries = [ | ||
{ | ||
email: "@accenture.com", | ||
}, | ||
{ | ||
email: ".edu.sg", | ||
}, | ||
] | ||
MockWhitelist.findAll.mockResolvedValueOnce(mockWhitelistEntries) | ||
describe("canSendEmailOtp", () => { | ||
it("should return true when the db query returns a record", async () => { | ||
// Arrange | ||
const expected = true | ||
MockSequelize.query.mockResolvedValueOnce([{ email: ".gov.sg" }]) | ||
|
||
// Act | ||
const actual = await UsersService.canSendEmailOtp(mockEmail) | ||
// Act | ||
const actual = await UsersService.canSendEmailOtp(mockEmail) | ||
|
||
// Assert | ||
expect(actual).toBe(expected) | ||
}) | ||
// Assert | ||
expect(actual).toBe(expected) | ||
}) | ||
|
||
it("should not allow suffix match if the whitelist entry is a full email", async () => { | ||
// Arrange | ||
const expected = false | ||
const mockWhitelistEntries = [ | ||
{ | ||
email: "[email protected]", | ||
}, | ||
] | ||
MockWhitelist.findAll.mockResolvedValueOnce(mockWhitelistEntries) | ||
it("should return false when the db query returns no record", async () => { | ||
// Arrange | ||
const expected = false | ||
// NOTE: This ends with gov.sg not .gov.sg (lacks a dot after the @) | ||
const emailWithoutDot = "[email protected]" | ||
MockSequelize.query.mockResolvedValueOnce([]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should still be adding the mock for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wait what? the tests can populate the DB? I would love indeed to run the query for real rather than arbitrarily mocking a db response! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops sorry i meant the jest mocks are reset! So we should be mocking the return of query here as That said we do have population of DB for some of our tests in The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, I'll take a look at the integration tests (I could probably use some help for that 😅) But so specifically in this case you are commenting on here, the mock does what we expect. For reference, the new query no longer return the full content of the table, it returns either:
These 2 distinct responses represent a boolean-ish yes/no state of whether the input is whitelisted. And in this case, the input There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah you're right, I got the intent of the test wrong sorry! But yeah i think we can then introduce an integration test with this scenario (db contains There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've updated the comments to make it clearer why a raw query is used: db15356 |
||
|
||
// Act | ||
const actual = await UsersService.canSendEmailOtp("[email protected]") | ||
// Act | ||
const actual = await UsersService.canSendEmailOtp(emailWithoutDot) | ||
|
||
// Assert | ||
expect(actual).toBe(expected) | ||
// Assert | ||
expect(actual).toBe(expected) | ||
}) | ||
}) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we share the before and after trace in the pr description to see the difference and also maybe if the latency improvements
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The after traces are already there, I'll add a before trace as well.
Aye, so on this a few things:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! 😄 I added a sample trace for the before in the PR description and I updated the performance section too.
So again, in the grand scheme of things, this is a very cheap query so this PR is not bringing visible impact to users. It's more to illustrate how to query the DB with good patterns :)
Also Just for reference, here is a screenshot on the variability of max run time of the current query in prod. We'll be comparing the performance of the new query against that (link to widget here)