Skip to content

Commit

Permalink
chore: Refactor and migrate Web scraping to app router, INTER-911, …
Browse files Browse the repository at this point in the history
…INTER-459 (#162)

* chore: move web scraping page to `app`

* chore: move web scraping api to `app`

* chore: move assets to app, fix build

* chore: add status codes
  • Loading branch information
JuroUhlar authored Sep 23, 2024
1 parent f6b89cd commit 5dfa010
Show file tree
Hide file tree
Showing 14 changed files with 119 additions and 125 deletions.
Original file line number Diff line number Diff line change
@@ -1,79 +1,34 @@
'use client';

import { UseCaseWrapper } from '../../client/components/common/UseCaseWrapper/UseCaseWrapper';
import FlightCard, { Flight } from '../../client/components/web-scraping/FlightCard';
import { useVisitorData } from '@fingerprintjs/fingerprintjs-pro-react';
import { useQueryState } from 'next-usequerystate';
import { useQuery, UseQueryResult } from 'react-query';
import { GetServerSideProps, NextPage } from 'next';
import { FlightQuery } from '../api/web-scraping/flights';
import { CheckResultObject } from '../../server/checkResult';
import { USE_CASES } from '../../client/components/common/content';
import { CustomPageProps } from '../_app';
import { Select, SelectItem } from '../../client/components/common/Select/Select';
import ArrowIcon from '../../client/img/arrowRight.svg';
import Image from 'next/image';
import styles from './webScraping.module.scss';
import Button from '../../client/components/common/Button/Button';
import { Alert } from '../../client/components/common/Alert/Alert';
import { Spinner } from '../../client/components/common/Spinner/Spinner';

// Make URL query object available as props to the page on first render
// to read `from`, `to` params and a `disableBotDetection` param for testing and demo purposes
export const getServerSideProps: GetServerSideProps<QueryAsProps> = async ({ query }) => {
const { from, to, disableBotDetection } = query;
return {
props: {
from: (from as string) ?? null,
to: (to as string) ?? null,
disableBotDetection: disableBotDetection === '1' || disableBotDetection === 'true',
},
};
};
import { FlightQuery } from './api/flights/route';
import { FunctionComponent, Suspense } from 'react';
import { useSearchParams } from 'next/navigation';
import { AIRPORTS } from './data/airports';
import { Flight, FlightCard } from './components/FlightCard';

type FlightQueryResult = CheckResultObject<Flight[]>;

export const AIRPORTS = [
{ city: 'San Francisco', code: 'SFO' },
{ city: 'New York', code: 'JFK' },
{ city: 'London', code: 'LHR' },
{ city: 'Tokyo', code: 'HND' },
{ city: 'Paris', code: 'CDG' },
{ city: 'Hong Kong', code: 'HKG' },
{ city: 'Singapore', code: 'SIN' },
{ city: 'Dubai', code: 'DXB' },
{ city: 'Shanghai', code: 'PVG' },
{ city: 'Seoul', code: 'ICN' },
{ city: 'Bangkok', code: 'BKK' },
{ city: 'Amsterdam', code: 'AMS' },
{ city: 'Beijing', code: 'PEK' },
{ city: 'Frankfurt', code: 'FRA' },
{ city: 'Cape Town', code: 'CPT' },
{ city: 'Sydney', code: 'SYD' },
{ city: 'Melbourne', code: 'MEL' },
{ city: 'Toronto', code: 'YYZ' },
{ city: 'Vancouver', code: 'YVR' },
{ city: 'Montreal', code: 'YUL' },
{ city: 'Brussels', code: 'BRU' },
{ city: 'Copenhagen', code: 'CPH' },
{ city: 'Oslo', code: 'OSL' },
{ city: 'Stockholm', code: 'ARN' },
{ city: 'Helsinki', code: 'HEL' },
{ city: 'Rome', code: 'FCO' },
];

type QueryAsProps = {
from: string | null;
to: string | null;
disableBotDetection: boolean;
};

export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
from,
to,
disableBotDetection,
embed,
}) => {
const [fromCode, setFromCode] = useQueryState('from', { defaultValue: from?.toUpperCase() ?? AIRPORTS[0].code });
const [toCode, setToCode] = useQueryState('to', { defaultValue: to?.toUpperCase() ?? AIRPORTS[1].code });
const WebScraping: FunctionComponent = () => {
const searchParams = useSearchParams();
const [fromCode, setFromCode] = useQueryState('from', {
defaultValue: searchParams?.get('from')?.toUpperCase() ?? AIRPORTS[0].code,
});
const [toCode, setToCode] = useQueryState('to', {
defaultValue: searchParams?.get('to')?.toUpperCase() ?? AIRPORTS[1].code,
});

/**
* We use the Fingerprint Pro React SDK hook to get visitor data (https://github.com/fingerprintjs/fingerprintjs-pro-react)
Expand All @@ -97,7 +52,7 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
['getFlights'],
async () => {
const { requestId } = await getVisitorData();
const response = await fetch(`/api/web-scraping/flights`, {
const response = await fetch(`/web-scraping/api/flights`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Expand All @@ -106,8 +61,8 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
from: fromCode,
to: toCode,
requestId,
disableBotDetection,
} as FlightQuery),
disableBotDetection: Boolean(searchParams?.get('disableBotDetection')),
} satisfies FlightQuery),
});
if (response.status < 500) {
return await response.json();
Expand All @@ -125,7 +80,7 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({

return (
<>
<UseCaseWrapper useCase={USE_CASES.webScraping} embed={embed}>
<UseCaseWrapper useCase={USE_CASES.webScraping}>
<h2 className={styles.searchTitle}>Search for today&apos;s flights</h2>
<form
onSubmit={(event) => {
Expand Down Expand Up @@ -173,6 +128,15 @@ export const WebScrapingUseCase: NextPage<QueryAsProps & CustomPageProps> = ({
);
};

export const WebScrapingUseCase = () => {
// Suspense required due to useSearchParams() https://nextjs.org/docs/messages/missing-suspense-with-csr-bailout
return (
<Suspense>
<WebScraping />
</Suspense>
);
};

const Results = ({ data, isFetching, error }: UseQueryResult<FlightQueryResult, Error>) => {
const { data: flights, message, severity } = data ?? {};

Expand Down Expand Up @@ -203,5 +167,3 @@ const Results = ({ data, isFetching, error }: UseQueryResult<FlightQueryResult,
</div>
);
};

export default WebScrapingUseCase;
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { NextApiRequest, NextApiResponse } from 'next';
import { Severity, getAndValidateFingerprintResult } from '../../../server/checks';
import { isValidPostRequest } from '../../../server/server';
import { ONE_DAY_MS, FIVE_MINUTES_MS, ONE_HOUR_MS } from '../../../shared/timeUtils';
import { AIRPORTS } from '../../web-scraping';
import { Flight } from '../../../client/components/web-scraping/FlightCard';
import { saveBotVisit } from '../../../server/botd-firewall/botVisitDatabase';
import { Severity, getAndValidateFingerprintResult } from '../../../../server/checks';
import { ONE_DAY_MS, FIVE_MINUTES_MS, ONE_HOUR_MS } from '../../../../shared/timeUtils';
import { saveBotVisit } from '../../../../server/botd-firewall/botVisitDatabase';
import { NextRequest, NextResponse } from 'next/server';
import { AIRPORTS } from '../../data/airports';
import { Flight } from '../../components/FlightCard';

const roundToFiveMinutes = (time: number) => Math.round(time / FIVE_MINUTES_MS) * FIVE_MINUTES_MS;

Expand All @@ -21,21 +20,17 @@ export type FlightsResponse = {
data?: Flight[];
};

export default async function getFlights(req: NextApiRequest, res: NextApiResponse<FlightsResponse>) {
// This API route accepts only POST requests.
const reqValidation = isValidPostRequest(req);
if (!reqValidation.okay) {
res.status(405).send({ severity: 'error', message: reqValidation.error });
return;
}

const { from, to, requestId, disableBotDetection } = req.body as FlightQuery;
export async function POST(req: NextRequest): Promise<NextResponse<FlightsResponse>> {
const { from, to, requestId, disableBotDetection } = (await req.json()) as FlightQuery;

// Get the full Identification and Bot Detection result from Fingerprint Server API and validate its authenticity
const fingerprintResult = await getAndValidateFingerprintResult({ requestId, req });
const fingerprintResult = await getAndValidateFingerprintResult({
requestId,
req,
options: { minConfidenceScore: 0.5 },
});
if (!fingerprintResult.okay) {
res.status(403).send({ severity: 'error', message: fingerprintResult.error });
return;
return NextResponse.json({ severity: 'error', message: fingerprintResult.error }, { status: 403 });
}

const identification = fingerprintResult.data.products?.identification?.data;
Expand All @@ -44,35 +39,34 @@ export default async function getFlights(req: NextApiRequest, res: NextApiRespon
// Backdoor for demo and testing purposes
// If bot detection is disabled, just send the result
if (!botData || disableBotDetection) {
res
.status(200)
.send({ severity: 'success', message: 'Bot detection is disabled.', data: getFlightResults(from, to) });
return;
return NextResponse.json({
severity: 'success',
message: 'Bot detection is disabled.',
data: getFlightResults(from, to),
});
}

// If a bot is detected, return an error
if (botData.bot?.result === 'bad') {
res.status(403).send({
severity: 'error',
message: '🤖 Malicious bot detected, access denied.',
});
// Optionally, here you could also save the bot's IP address to a blocklist in your database
// and block all requests from this IP address in the future at a web server/firewall level.
saveBotVisit(botData, identification?.visitorId ?? 'N/A');
return;
return NextResponse.json(
{ severity: 'error', message: '🤖 Malicious bot detected, access denied.' },
{ status: 403 },
);
}

// Check for unexpected bot detection value, just in case
if (!['notDetected', 'good'].includes(botData.bot?.result)) {
res.status(500).send({
severity: 'error',
message: 'Server error, unexpected bot detection value.',
});
return;
return NextResponse.json(
{ severity: 'error', message: 'Server error, unexpected bot detection value.' },
{ status: 500 },
);
}

// All checks passed, allow access
res.status(200).send({
return NextResponse.json({
severity: 'success',
message: 'No malicious bot nor spoofing detected, access allowed.',
data: getFlightResults(from, to),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { FunctionComponent } from 'react';
import styles from './FlightCard.module.scss';
import DepartureIcon from '../../img/departure.svg';
import ArrivalIcon from '../../img/arrival.svg';
import AirCanada from '../../img/airCanada.svg';
import DepartureIcon from '../images/departure.svg';
import ArrivalIcon from '../images/arrival.svg';
import AirCanada from '../images/airCanada.svg';
import StarIcon from '../images/star.svg';
import Image from 'next/image';
import Button from '../common/Button/Button';
import StarIcon from '../../img/star.svg';
import { TEST_IDS } from '../../testIDs';
import { ONE_HOUR_MS, ONE_MINUTE_MS } from '../../../shared/timeUtils';
import { TEST_IDS } from '../../../client/testIDs';
import Button from '../../../client/components/common/Button/Button';

const TEST_ID = TEST_IDS.webScraping;

Expand Down Expand Up @@ -174,5 +174,3 @@ export const FlightCard: FunctionComponent<FlightCardProps> = ({ flight }) => {
</div>
);
};

export default FlightCard;
28 changes: 28 additions & 0 deletions src/app/web-scraping/data/airports.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
export const AIRPORTS = [
{ city: 'San Francisco', code: 'SFO' },
{ city: 'New York', code: 'JFK' },
{ city: 'London', code: 'LHR' },
{ city: 'Tokyo', code: 'HND' },
{ city: 'Paris', code: 'CDG' },
{ city: 'Hong Kong', code: 'HKG' },
{ city: 'Singapore', code: 'SIN' },
{ city: 'Dubai', code: 'DXB' },
{ city: 'Shanghai', code: 'PVG' },
{ city: 'Seoul', code: 'ICN' },
{ city: 'Bangkok', code: 'BKK' },
{ city: 'Amsterdam', code: 'AMS' },
{ city: 'Beijing', code: 'PEK' },
{ city: 'Frankfurt', code: 'FRA' },
{ city: 'Cape Town', code: 'CPT' },
{ city: 'Sydney', code: 'SYD' },
{ city: 'Melbourne', code: 'MEL' },
{ city: 'Toronto', code: 'YYZ' },
{ city: 'Vancouver', code: 'YVR' },
{ city: 'Montreal', code: 'YUL' },
{ city: 'Brussels', code: 'BRU' },
{ city: 'Copenhagen', code: 'CPH' },
{ city: 'Oslo', code: 'OSL' },
{ city: 'Stockholm', code: 'ARN' },
{ city: 'Helsinki', code: 'HEL' },
{ city: 'Rome', code: 'FCO' },
];
9 changes: 9 additions & 0 deletions src/app/web-scraping/embed/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { USE_CASES } from '../../../client/components/common/content';
import { generateUseCaseMetadata } from '../../../client/components/common/seo';
import { WebScrapingUseCase } from '../WebScraping';

export const metadata = generateUseCaseMetadata(USE_CASES.webScraping);

export default function WebScrapingPage() {
return <WebScrapingUseCase />;
}
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
9 changes: 9 additions & 0 deletions src/app/web-scraping/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { USE_CASES } from '../../client/components/common/content';
import { generateUseCaseMetadata } from '../../client/components/common/seo';
import { WebScrapingUseCase } from './WebScraping';

export const metadata = generateUseCaseMetadata(USE_CASES.webScraping);

export default function WebScrapingPage() {
return <WebScrapingUseCase />;
}
13 changes: 0 additions & 13 deletions src/pages/web-scraping/embed.tsx

This file was deleted.

15 changes: 11 additions & 4 deletions src/server/checks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,9 @@ type GetFingerprintResultArgs = {
serverApiKey?: string;
region?: Region;
options?: {
blockTor: boolean;
blockBots: boolean;
blockTor?: boolean;
blockBots?: boolean;
minConfidenceScore?: number;
};
};

Expand Down Expand Up @@ -281,8 +282,14 @@ export const getAndValidateFingerprintResult = async ({
* This is context-sensitive and less reliable than the binary checks above, that's why it is checked last.
* More info: https://dev.fingerprint.com/docs/understanding-your-confidence-score
*/
if (identification?.confidence?.score && identification?.confidence?.score < env.MIN_CONFIDENCE_SCORE) {
return { okay: false, error: 'Identification confidence score too low, potential spoofing attack.' };
if (
identification?.confidence?.score &&
identification?.confidence?.score < (options?.minConfidenceScore ?? env.MIN_CONFIDENCE_SCORE)
) {
return {
okay: false,
error: `Identification confidence score too low (${identification?.confidence?.score}), potential spoofing attack.`,
};
}

// All checks passed, we can trust this identification event
Expand Down

0 comments on commit 5dfa010

Please sign in to comment.