diff --git a/HLD/Nearby-Theater-Schedule.md b/doc/Nearby-Theater-Schedule.md similarity index 100% rename from HLD/Nearby-Theater-Schedule.md rename to doc/Nearby-Theater-Schedule.md diff --git a/doc/dbSetup.md b/doc/dbSetup.md new file mode 100644 index 0000000..e52f893 --- /dev/null +++ b/doc/dbSetup.md @@ -0,0 +1,19 @@ +# Create index +To speed up db query, we need to create index +1. yahooMovies : yahooId: -1 +2. pttArticles : url: -1 + +### Sample command +``` +db.runCommand({ + "createIndexes": "pttArticles", + "indexes": [ + { + "key": { + "url": -1 + }, + "name": "url" + } + ] +}) +``` \ No newline at end of file diff --git a/src/task/imdbTask.ts b/src/task/imdbTask.ts index 0ce7055..22222d3 100644 --- a/src/task/imdbTask.ts +++ b/src/task/imdbTask.ts @@ -4,13 +4,15 @@ import { getIMDBMovieInfo } from '../crawler/imdbCrawler'; import Movie from "../models/movie"; export async function updateImdbInfo() { - const movieInfos = await getNewImdbInfos(); + let movieInfos = await getNewImdbInfos(); + removeEmptyInfos(movieInfos); logResult(movieInfos); return updateYahooMovies(movieInfos); } +const imdbLastCrawlTimeFormat = 'YYYY-MM-DDTHH'; async function getNewImdbInfos() { - const imdbLastCrawlTime = moment().format('YYYY-MM-DD'); + const imdbLastCrawlTime = moment().format(imdbLastCrawlTimeFormat); const yahooMovies: Movie[] = await db.getCollection("yahooMovies"); const promises = yahooMovies.filter(filterNeedCrawlMovie).map(async ({ englishTitle, yahooId }) => { const imdbInfo = await getIMDBMovieInfo(englishTitle); @@ -23,14 +25,21 @@ async function getNewImdbInfos() { return Promise.all(promises); } -function filterNeedCrawlMovie({ englishTitle, imdbRating, releaseDate, imdbLastCrawlTime }: Movie) { +function filterNeedCrawlMovie({ englishTitle, releaseDate, imdbLastCrawlTime }: Movie) { let now = moment(); let isRecentMovie = now.diff(moment(releaseDate), 'months') <= 6; - let hasCrawlToday = imdbLastCrawlTime && (now.diff(moment(imdbLastCrawlTime), 'days') === 0); - let shouldCrawl = !hasCrawlToday && englishTitle && (isRecentMovie || (!isRecentMovie && !imdbLastCrawlTime)); + let hasCrawlNearly = imdbLastCrawlTime && (now.diff(moment(imdbLastCrawlTime, imdbLastCrawlTimeFormat), 'hours') <= 12); + let shouldCrawl = !hasCrawlNearly && englishTitle && (isRecentMovie || (!isRecentMovie && !imdbLastCrawlTime)); return shouldCrawl; } +function removeEmptyInfos(movieInfos: Movie[]){ + movieInfos.forEach(info=>{ + !info.imdbID && delete info.imdbID; + !info.imdbRating && delete info.imdbRating; + }) +} + function logResult(movieInfos: Movie[]) { const foundMovies = movieInfos.filter(movie => movie.imdbID); const notfoundMovieIds = movieInfos.filter(movie => !movie.imdbID).map(movie => movie.yahooId); diff --git a/src/test/imdbTask.test.ts b/src/test/imdbTask.test.ts index 7f52764..a6c2787 100644 --- a/src/test/imdbTask.test.ts +++ b/src/test/imdbTask.test.ts @@ -11,29 +11,40 @@ const should = chai.should(); chai.use(sinonChai); describe('imdbTask', () => { - let sandbox, stubUpdateDocument, stubGetCollection: sinon.SinonStub; - before(() => { + let sandbox: sinon.SinonSandbox, stubUpdateDocument: sinon.SinonStub, stubGetCollection: sinon.SinonStub; + beforeEach(() => { sandbox = sinon.sandbox.create(); stubUpdateDocument = sandbox.stub(db, 'updateDocument'); stubGetCollection = sandbox.stub(db, 'getCollection'); }); - after(() => { + afterEach(() => { sandbox.restore(); }); describe('updateImdbInfo', () => { - it("should get yahooMovies then update nearly movies' imdb info", async function () { - const yahooMovies: Movie[] = [{ - yahooId: 6777, - englishTitle: 'Dangal', - releaseDate: moment().format(), - imdbLastCrawlTime: moment().subtract(2, 'days').format() - }] - stubGetCollection.returns(yahooMovies); - const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns([1]); + const yahooMovie: Movie = { + yahooId: 6777, + englishTitle: 'Dangal', + releaseDate: moment().format(), + imdbLastCrawlTime: moment().subtract(2, 'days').format() + }; + + + it("One yahooMovie should called GetIMDBMovieInfo Once", async function () { + stubGetCollection.returns([yahooMovie]); + const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns({ imdbID: "", imdbRating: "" }); + await updateImdbInfo(); + sandbox.assert.calledOnce(stubGetIMDBMovieInfo); + }); + + it("should update db without info when it's empty", async function () { + stubGetCollection.returns([yahooMovie]); + const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns({ imdbID: "", imdbRating: "" }); await updateImdbInfo(); - sandbox.assert.calledOnce(stubUpdateDocument); + sandbox.assert.calledWith(stubUpdateDocument, + { yahooId: yahooMovie.yahooId }, + { yahooId: yahooMovie.yahooId, imdbLastCrawlTime: moment().format('YYYY-MM-DDTHH') }); }); }); }); \ No newline at end of file