Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Refactor and migrate Web scraping to app router, INTER-911, INTER-459 #162

Merged
merged 4 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,79 +1,34 @@
'use client';

import { UseCaseWrapper } from '../../client/components/common/UseCaseWrapper/UseCaseWrapper';
import FlightCard, { Flight } from '../../client/components/web-scraping/FlightCard';
import { useVisitorData } from '@fingerprintjs/fingerprintjs-pro-react';
import { useQueryState } from 'next-usequerystate';
import { useQuery, UseQueryResult } from 'react-query';
import { GetServerSideProps, NextPage } from 'next';
import { FlightQuery } from '../api/web-scraping/flights';
import { CheckResultObject } from '../../server/checkResult';
import { USE_CASES } from '../../client/components/common/content';
import { CustomPageProps } from '../_app';
import { Select, SelectItem } from '../../client/components/common/Select/Select';
import ArrowIcon from '../../client/img/arrowRight.svg';
import Image from 'next/image';
import styles from './webScraping.module.scss';
import Button from '../../client/components/common/Button/Button';
import { Alert } from '../../client/components/common/Alert/Alert';
import { Spinner } from '../../client/components/common/Spinner/Spinner';

// Make URL query object available as props to the page on first render
// to read `from`, `to` params and a `disableBotDetection` param for testing and demo purposes
export const getServerSideProps: GetServerSideProps<QueryAsProps> = async ({ query }) => {
const { from, to, disableBotDetection } = query;
return {
props: {
from: (from as string) ?? null,
to: (to as string) ?? null,
disableBotDetection: disableBotDetection === '1' || disableBotDetection === 'true',
},
};
};
import { FlightQuery } from './api/flights/route';
import { FunctionComponent, Suspense } from 'react';
import { useSearchParams } from 'next/navigation';
import { AIRPORTS } from './data/airports';
import { Flight, FlightCard } from './components/FlightCard';

type FlightQueryResult = CheckResultObject<Flight[]>;

export const AIRPORTS = [
{ city: 'San Francisco', code: 'SFO' },
{ city: 'New York', code: 'JFK' },
{ city: 'London', code: 'LHR' },
{ city: 'Tokyo', code: 'HND' },
{ city: 'Paris', code: 'CDG' },
{ city: 'Hong Kong', code: 'HKG' },
{ city: 'Singapore', code: 'SIN' },
{ city: 'Dubai', code: 'DXB' },
{ city: 'Shanghai', code: 'PVG' },
{ city: 'Seoul', code: 'ICN' },
{ city: 'Bangkok', code: 'BKK' },
{ city: 'Amsterdam', code: 'AMS' },
{ city: 'Beijing', code: 'PEK' },
{ city: 'Frankfurt', code: 'FRA' },
{ city: 'Cape Town', code: 'CPT' },
{ city: 'Sydney', code: 'SYD' },
{ city: 'Melbourne', code: 'MEL' },
{ city: 'Toronto', code: 'YYZ' },
{ city: 'Vancouver', code: 'YVR' },
{ city: 'Montreal', code: 'YUL' },
{ city: 'Brussels', code: 'BRU' },
{ city: 'Copenhagen', code: 'CPH' },
{ city: 'Oslo', code: 'OSL' },
{ city: 'Stockholm', code: 'ARN' },
{ city: 'Helsinki', code: 'HEL' },
{ city: 'Rome', code: 'FCO' },
];

type QueryAsProps = {
from: string | null;
to: string | null;
disableBotDetection: boolean;
};

export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
from,
to,
disableBotDetection,
embed,
}) => {
const [fromCode, setFromCode] = useQueryState('from', { defaultValue: from?.toUpperCase() ?? AIRPORTS[0].code });
const [toCode, setToCode] = useQueryState('to', { defaultValue: to?.toUpperCase() ?? AIRPORTS[1].code });
const WebScraping: FunctionComponent = () => {
const searchParams = useSearchParams();
const [fromCode, setFromCode] = useQueryState('from', {
defaultValue: searchParams?.get('from')?.toUpperCase() ?? AIRPORTS[0].code,
});
const [toCode, setToCode] = useQueryState('to', {
defaultValue: searchParams?.get('to')?.toUpperCase() ?? AIRPORTS[1].code,
});

/**
* We use the Fingerprint Pro React SDK hook to get visitor data (https://github.com/fingerprintjs/fingerprintjs-pro-react)
Expand All @@ -97,7 +52,7 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
['getFlights'],
async () => {
const { requestId } = await getVisitorData();
const response = await fetch(`/api/web-scraping/flights`, {
const response = await fetch(`/web-scraping/api/flights`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Expand All @@ -106,8 +61,8 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
from: fromCode,
to: toCode,
requestId,
disableBotDetection,
} as FlightQuery),
disableBotDetection: Boolean(searchParams?.get('disableBotDetection')),
} satisfies FlightQuery),
});
if (response.status < 500) {
return await response.json();
Expand All @@ -125,7 +80,7 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({

return (
<>
<UseCaseWrapper useCase={USE_CASES.webScraping} embed={embed}>
<UseCaseWrapper useCase={USE_CASES.webScraping}>
<h2 className={styles.searchTitle}>Search for today&apos;s flights</h2>
<form
onSubmit={(event) => {
Expand Down Expand Up @@ -173,6 +128,15 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
);
};

export const WebScrapingUseCase = () => {
// Suspense required due to useSearchParams() https://nextjs.org/docs/messages/missing-suspense-with-csr-bailout
return (
<Suspense>
<WebScraping />
</Suspense>
);
};

const Results = ({ data, isFetching, error }: UseQueryResult<FlightQueryResult, Error>) => {
const { data: flights, message, severity } = data ?? {};

Expand Down Expand Up @@ -203,5 +167,3 @@ const Results = ({ data, isFetching, error }: UseQueryResult<FlightQueryResult,
</div>
);
};

export default WebScrapingUseCase;
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { NextApiRequest, NextApiResponse } from 'next';
import { Severity, getAndValidateFingerprintResult } from '../../../server/checks';
import { isValidPostRequest } from '../../../server/server';
import { ONE_DAY_MS, FIVE_MINUTES_MS, ONE_HOUR_MS } from '../../../shared/timeUtils';
import { AIRPORTS } from '../../web-scraping';
import { Flight } from '../../../client/components/web-scraping/FlightCard';
import { saveBotVisit } from '../../../server/botd-firewall/botVisitDatabase';
import { Severity, getAndValidateFingerprintResult } from '../../../../server/checks';
import { ONE_DAY_MS, FIVE_MINUTES_MS, ONE_HOUR_MS } from '../../../../shared/timeUtils';
import { saveBotVisit } from '../../../../server/botd-firewall/botVisitDatabase';
import { NextRequest, NextResponse } from 'next/server';
import { AIRPORTS } from '../../data/airports';
import { Flight } from '../../components/FlightCard';

const roundToFiveMinutes = (time: number) => Math.round(time / FIVE_MINUTES_MS) * FIVE_MINUTES_MS;

Expand All @@ -21,21 +20,17 @@ export type FlightsResponse = {
data?: Flight[];
};

export default async function getFlights(req: NextApiRequest, res: NextApiResponse<FlightsResponse>) {
// This API route accepts only POST requests.
const reqValidation = isValidPostRequest(req);
if (!reqValidation.okay) {
res.status(405).send({ severity: 'error', message: reqValidation.error });
return;
}

const { from, to, requestId, disableBotDetection } = req.body as FlightQuery;
export async function POST(req: NextRequest): Promise<NextResponse<FlightsResponse>> {
const { from, to, requestId, disableBotDetection } = (await req.json()) as FlightQuery;

// Get the full Identification and Bot Detection result from Fingerprint Server API and validate its authenticity
const fingerprintResult = await getAndValidateFingerprintResult({ requestId, req });
const fingerprintResult = await getAndValidateFingerprintResult({
requestId,
req,
options: { minConfidenceScore: 0.5 },
});
if (!fingerprintResult.okay) {
res.status(403).send({ severity: 'error', message: fingerprintResult.error });
return;
return NextResponse.json({ severity: 'error', message: fingerprintResult.error }, { status: 403 });
}

const identification = fingerprintResult.data.products?.identification?.data;
Expand All @@ -44,35 +39,34 @@ export default async function getFlights(req: NextApiRequest, res: NextApiRespon
// Backdoor for demo and testing purposes
// If bot detection is disabled, just send the result
if (!botData || disableBotDetection) {
res
.status(200)
.send({ severity: 'success', message: 'Bot detection is disabled.', data: getFlightResults(from, to) });
return;
return NextResponse.json({
severity: 'success',
message: 'Bot detection is disabled.',
data: getFlightResults(from, to),
});
}

// If a bot is detected, return an error
if (botData.bot?.result === 'bad') {
res.status(403).send({
severity: 'error',
message: '🤖 Malicious bot detected, access denied.',
});
// Optionally, here you could also save the bot's IP address to a blocklist in your database
// and block all requests from this IP address in the future at a web server/firewall level.
saveBotVisit(botData, identification?.visitorId ?? 'N/A');
return;
return NextResponse.json(
{ severity: 'error', message: '🤖 Malicious bot detected, access denied.' },
{ status: 403 },
);
}

// Check for unexpected bot detection value, just in case
if (!['notDetected', 'good'].includes(botData.bot?.result)) {
res.status(500).send({
severity: 'error',
message: 'Server error, unexpected bot detection value.',
});
return;
return NextResponse.json(
{ severity: 'error', message: 'Server error, unexpected bot detection value.' },
{ status: 500 },
);
}

// All checks passed, allow access
res.status(200).send({
return NextResponse.json({
severity: 'success',
message: 'No malicious bot nor spoofing detected, access allowed.',
data: getFlightResults(from, to),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { FunctionComponent } from 'react';
import styles from './FlightCard.module.scss';
import DepartureIcon from '../../img/departure.svg';
import ArrivalIcon from '../../img/arrival.svg';
import AirCanada from '../../img/airCanada.svg';
import DepartureIcon from '../images/departure.svg';
import ArrivalIcon from '../images/arrival.svg';
import AirCanada from '../images/airCanada.svg';
import StarIcon from '../images/star.svg';
import Image from 'next/image';
import Button from '../common/Button/Button';
import StarIcon from '../../img/star.svg';
import { TEST_IDS } from '../../testIDs';
import { ONE_HOUR_MS, ONE_MINUTE_MS } from '../../../shared/timeUtils';
import { TEST_IDS } from '../../../client/testIDs';
import Button from '../../../client/components/common/Button/Button';

const TEST_ID = TEST_IDS.webScraping;

Expand Down Expand Up @@ -174,5 +174,3 @@ export const FlightCard: FunctionComponent<FlightCardProps> = ({ flight }) => {
</div>
);
};

export default FlightCard;
28 changes: 28 additions & 0 deletions src/app/web-scraping/data/airports.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
export const AIRPORTS = [
{ city: 'San Francisco', code: 'SFO' },
{ city: 'New York', code: 'JFK' },
{ city: 'London', code: 'LHR' },
{ city: 'Tokyo', code: 'HND' },
{ city: 'Paris', code: 'CDG' },
{ city: 'Hong Kong', code: 'HKG' },
{ city: 'Singapore', code: 'SIN' },
{ city: 'Dubai', code: 'DXB' },
{ city: 'Shanghai', code: 'PVG' },
{ city: 'Seoul', code: 'ICN' },
{ city: 'Bangkok', code: 'BKK' },
{ city: 'Amsterdam', code: 'AMS' },
{ city: 'Beijing', code: 'PEK' },
{ city: 'Frankfurt', code: 'FRA' },
{ city: 'Cape Town', code: 'CPT' },
{ city: 'Sydney', code: 'SYD' },
{ city: 'Melbourne', code: 'MEL' },
{ city: 'Toronto', code: 'YYZ' },
{ city: 'Vancouver', code: 'YVR' },
{ city: 'Montreal', code: 'YUL' },
{ city: 'Brussels', code: 'BRU' },
{ city: 'Copenhagen', code: 'CPH' },
{ city: 'Oslo', code: 'OSL' },
{ city: 'Stockholm', code: 'ARN' },
{ city: 'Helsinki', code: 'HEL' },
{ city: 'Rome', code: 'FCO' },
];
9 changes: 9 additions & 0 deletions src/app/web-scraping/embed/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { USE_CASES } from '../../../client/components/common/content';
import { generateUseCaseMetadata } from '../../../client/components/common/seo';
import { WebScrapingUseCase } from '../WebScraping';

export const metadata = generateUseCaseMetadata(USE_CASES.webScraping);

export default function WebScrapingPage() {
return <WebScrapingUseCase />;
}
File renamed without changes
File renamed without changes
9 changes: 9 additions & 0 deletions src/app/web-scraping/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { USE_CASES } from '../../client/components/common/content';
import { generateUseCaseMetadata } from '../../client/components/common/seo';
import { WebScrapingUseCase } from './WebScraping';

export const metadata = generateUseCaseMetadata(USE_CASES.webScraping);

export default function WebScrapingPage() {
return <WebScrapingUseCase />;
}
13 changes: 0 additions & 13 deletions src/pages/web-scraping/embed.tsx

This file was deleted.

15 changes: 11 additions & 4 deletions src/server/checks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@ type GetFingerprintResultArgs = {
serverApiKey?: string;
region?: Region;
options?: {
blockTor: boolean;
blockBots: boolean;
blockTor?: boolean;
blockBots?: boolean;
minConfidenceScore?: number;
};
};

Expand Down Expand Up @@ -281,8 +282,14 @@ export const getAndValidateFingerprintResult = async ({
* This is context-sensitive and less reliable than the binary checks above, that's why it is checked last.
* More info: https://dev.fingerprint.com/docs/understanding-your-confidence-score
*/
if (identification?.confidence?.score && identification?.confidence?.score < env.MIN_CONFIDENCE_SCORE) {
return { okay: false, error: 'Identification confidence score too low, potential spoofing attack.' };
if (
identification?.confidence?.score &&
identification?.confidence?.score < (options?.minConfidenceScore ?? env.MIN_CONFIDENCE_SCORE)
) {
return {
okay: false,
error: `Identification confidence score too low (${identification?.confidence?.score}), potential spoofing attack.`,
};
}

// All checks passed, we can trust this identification event
Expand Down
Loading