Skip to content

Commit

Permalink
fix db update with empty imdbinfo bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Asing1001 committed Apr 14, 2017
1 parent ab5013d commit d07e527
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 18 deletions.
File renamed without changes.
19 changes: 19 additions & 0 deletions doc/dbSetup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Create index
To speed up db query, we need to create index
1. yahooMovies : yahooId: -1
2. pttArticles : url: -1

### Sample command
```
db.runCommand({
"createIndexes": "pttArticles",
"indexes": [
{
"key": {
"url": -1
},
"name": "url"
}
]
})
```
19 changes: 14 additions & 5 deletions src/task/imdbTask.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import { getIMDBMovieInfo } from '../crawler/imdbCrawler';
import Movie from "../models/movie";

export async function updateImdbInfo() {
const movieInfos = await getNewImdbInfos();
let movieInfos = await getNewImdbInfos();
removeEmptyInfos(movieInfos);
logResult(movieInfos);
return updateYahooMovies(movieInfos);
}

const imdbLastCrawlTimeFormat = 'YYYY-MM-DDTHH';
async function getNewImdbInfos() {
const imdbLastCrawlTime = moment().format('YYYY-MM-DD');
const imdbLastCrawlTime = moment().format(imdbLastCrawlTimeFormat);
const yahooMovies: Movie[] = await db.getCollection("yahooMovies");
const promises = yahooMovies.filter(filterNeedCrawlMovie).map(async ({ englishTitle, yahooId }) => {
const imdbInfo = await getIMDBMovieInfo(englishTitle);
Expand All @@ -23,14 +25,21 @@ async function getNewImdbInfos() {
return Promise.all(promises);
}

function filterNeedCrawlMovie({ englishTitle, imdbRating, releaseDate, imdbLastCrawlTime }: Movie) {
function filterNeedCrawlMovie({ englishTitle, releaseDate, imdbLastCrawlTime }: Movie) {
let now = moment();
let isRecentMovie = now.diff(moment(releaseDate), 'months') <= 6;
let hasCrawlToday = imdbLastCrawlTime && (now.diff(moment(imdbLastCrawlTime), 'days') === 0);
let shouldCrawl = !hasCrawlToday && englishTitle && (isRecentMovie || (!isRecentMovie && !imdbLastCrawlTime));
let hasCrawlNearly = imdbLastCrawlTime && (now.diff(moment(imdbLastCrawlTime, imdbLastCrawlTimeFormat), 'hours') <= 12);
let shouldCrawl = !hasCrawlNearly && englishTitle && (isRecentMovie || (!isRecentMovie && !imdbLastCrawlTime));
return shouldCrawl;
}

function removeEmptyInfos(movieInfos: Movie[]){
movieInfos.forEach(info=>{
!info.imdbID && delete info.imdbID;
!info.imdbRating && delete info.imdbRating;
})
}

function logResult(movieInfos: Movie[]) {
const foundMovies = movieInfos.filter(movie => movie.imdbID);
const notfoundMovieIds = movieInfos.filter(movie => !movie.imdbID).map(movie => movie.yahooId);
Expand Down
37 changes: 24 additions & 13 deletions src/test/imdbTask.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,40 @@ const should = chai.should();
chai.use(sinonChai);

describe('imdbTask', () => {
let sandbox, stubUpdateDocument, stubGetCollection: sinon.SinonStub;
before(() => {
let sandbox: sinon.SinonSandbox, stubUpdateDocument: sinon.SinonStub, stubGetCollection: sinon.SinonStub;
beforeEach(() => {
sandbox = sinon.sandbox.create();
stubUpdateDocument = sandbox.stub(db, 'updateDocument');
stubGetCollection = sandbox.stub(db, 'getCollection');
});

after(() => {
afterEach(() => {
sandbox.restore();
});

describe('updateImdbInfo', () => {
it("should get yahooMovies then update nearly movies' imdb info", async function () {
const yahooMovies: Movie[] = [{
yahooId: 6777,
englishTitle: 'Dangal',
releaseDate: moment().format(),
imdbLastCrawlTime: moment().subtract(2, 'days').format()
}]
stubGetCollection.returns(yahooMovies);
const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns([1]);
const yahooMovie: Movie = {
yahooId: 6777,
englishTitle: 'Dangal',
releaseDate: moment().format(),
imdbLastCrawlTime: moment().subtract(2, 'days').format()
};


it("One yahooMovie should called GetIMDBMovieInfo Once", async function () {
stubGetCollection.returns([yahooMovie]);
const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns({ imdbID: "", imdbRating: "" });
await updateImdbInfo();
sandbox.assert.calledOnce(stubGetIMDBMovieInfo);
});

it("should update db without info when it's empty", async function () {
stubGetCollection.returns([yahooMovie]);
const stubGetIMDBMovieInfo = sandbox.stub(imdbCrawler, 'getIMDBMovieInfo').returns({ imdbID: "", imdbRating: "" });
await updateImdbInfo();
sandbox.assert.calledOnce(stubUpdateDocument);
sandbox.assert.calledWith(stubUpdateDocument,
{ yahooId: yahooMovie.yahooId },
{ yahooId: yahooMovie.yahooId, imdbLastCrawlTime: moment().format('YYYY-MM-DDTHH') });
});
});
});

0 comments on commit d07e527

Please sign in to comment.