diff --git a/lib/app.js b/lib/app.js index 28b6e9cb..3b2d6e27 100644 --- a/lib/app.js +++ b/lib/app.js @@ -4,8 +4,9 @@ const R = require('ramda'); const queryString = require('querystring'); const request = require('./utils/request'); const scriptData = require('./utils/scriptData'); -const debug = require('debug')('google-play-scraper'); -const cheerio = require('cheerio'); +const mappingV1 = require('./mapping/app/general.v1'); +const mappingV2 = require('./mapping/app/general.v2'); + const { BASE_URL } = require('./constants'); const PLAYSTORE_URL = `${BASE_URL}/store/apps/details`; @@ -34,7 +35,12 @@ function app (opts) { request(options, opts.throttle) .then(scriptData.parse) // comment next line to get raw data - .then(scriptData.extractor(MAPPINGS)) + .then((parsedData) => { + const isV2 = typeof R.path(mappingV1.title[0], parsedData) !== 'string'; + const mapping = isV2 ? mappingV2 : mappingV1; + + return scriptData.extractor(mapping)(parsedData); + }) .then(R.assoc('appId', opts.appId)) .then(R.assoc('url', reqUrl)) .then(resolve) @@ -42,170 +48,4 @@ function app (opts) { }); } -const MAPPINGS = { - // FIXME add appId - title: ['ds:5', 0, 0, 0], - description: { - path: ['ds:5', 0, 10, 0, 1], - fun: descriptionText - }, - descriptionHTML: ['ds:5', 0, 10, 0, 1], - summary: ['ds:5', 0, 10, 1, 1], - installs: ['ds:5', 0, 12, 9, 0], - minInstalls: ['ds:5', 0, 12, 9, 1], - maxInstalls: ['ds:5', 0, 12, 9, 2], - score: ['ds:6', 0, 6, 0, 1], - scoreText: ['ds:6', 0, 6, 0, 0], - ratings: ['ds:6', 0, 6, 2, 1], - reviews: ['ds:6', 0, 6, 3, 1], - histogram: { - path: ['ds:6', 0, 6, 1], - fun: buildHistogram - }, - - price: { - path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0], - fun: (val) => val / 1000000 || 0 - }, - free: { - path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0], - // considered free only if price is exactly zero - fun: (val) => val === 0 - }, - currency: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1], - priceText: { - path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 2], - fun: priceText - }, - available: { - path: ['ds:5', 0, 12, 11, 0], - fun: Boolean - }, - offersIAP: { - path: ['ds:5', 0, 12, 12, 0], - fun: Boolean - }, - IAPRange: ['ds:5', 0, 12, 12, 0], - size: ['ds:8', 0], - androidVersion: { - path: ['ds:8', 2], - fun: normalizeAndroidVersion - }, - androidVersionText: ['ds:8', 2], - developer: ['ds:5', 0, 12, 5, 1], - developerId: { - path: ['ds:5', 0, 12, 5, 5, 4, 2], - fun: (devUrl) => devUrl.split('id=')[1] - }, - developerEmail: ['ds:5', 0, 12, 5, 2, 0], - developerWebsite: ['ds:5', 0, 12, 5, 3, 5, 2], - developerAddress: ['ds:5', 0, 12, 5, 4, 0], - privacyPolicy: ['ds:5', 0, 12, 7, 2], - developerInternalID: ['ds:5', 0, 12, 5, 0, 0], - genre: ['ds:5', 0, 12, 13, 0, 0], - genreId: ['ds:5', 0, 12, 13, 0, 2], - familyGenre: ['ds:5', 0, 12, 13, 1, 0], - familyGenreId: ['ds:5', 0, 12, 13, 1, 2], - icon: ['ds:5', 0, 12, 1, 3, 2], - headerImage: ['ds:5', 0, 12, 2, 3, 2], - screenshots: { - path: ['ds:5', 0, 12, 0], - fun: (screenshots) => { - if (screenshots === null) return []; - return screenshots.map(R.path([3, 2])); - } - }, - video: ['ds:5', 0, 12, 3, 0, 3, 2], - videoImage: ['ds:5', 0, 12, 3, 1, 3, 2], - contentRating: ['ds:5', 0, 12, 4, 0], - contentRatingDescription: ['ds:5', 0, 12, 4, 2, 1], - adSupported: { - path: ['ds:5', 0, 12, 14, 0], - fun: Boolean - }, - released: ['ds:5', 0, 12, 36], - updated: { - path: ['ds:5', 0, 12, 8, 0], - fun: (ts) => ts * 1000 - }, - version: ['ds:8', 1], - recentChanges: ['ds:5', 0, 12, 6, 1], - comments: { - useServiceRequestId: 'UsvDTd', - path: [0], - fun: extractComments - }, - editorsChoice: { - path: ['ds:5', 0, 12, 15, 0], - fun: Boolean - }, - features: { - path: ['ds:5', 0, 12, 16], - fun: extractFeatures - } -}; - -function extractFeatures (featuresArray) { - if (featuresArray === null) { - return []; - } - - const features = featuresArray[2] || []; - - return features.map(feature => ({ - title: feature[0], - description: R.path([1, 0, 0, 1], feature) - })); -} - -function descriptionText (description) { - // preserve the line breaks when converting to text - const html = cheerio.load('
' + description.replace(/
/g, '\r\n') + '
'); - return html('div').text(); -} - -function priceText (priceText) { - return priceText || 'Free'; -} - -function normalizeAndroidVersion (androidVersionText) { - const number = androidVersionText.split(' ')[0]; - if (parseFloat(number)) { - return number; - } - - return 'VARY'; -} - -function buildHistogram (container) { - if (!container) { - return { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 }; - } - - return { - 1: container[1][1], - 2: container[2][1], - 3: container[3][1], - 4: container[4][1], - 5: container[5][1] - }; -} - -/** - * Extract the comments from google play script array - * @param {array} comments The comments array - */ -function extractComments (comments) { - if (!comments) { - return []; - } - - debug('comments: %O', comments); - - return R.compose( - R.take(5), - R.reject(R.isNil), - R.pluck(4))(comments); -} - module.exports = app; diff --git a/lib/mapping/app/general.v1.js b/lib/mapping/app/general.v1.js new file mode 100644 index 00000000..093b5dc3 --- /dev/null +++ b/lib/mapping/app/general.v1.js @@ -0,0 +1,105 @@ +const R = require('ramda'); +const helper = require('../../utils/mappingHelpers'); + +module.exports = { + // FIXME add appId + title: ['ds:5', 0, 0, 0], + description: { + path: ['ds:5', 0, 10, 0, 1], + fun: helper.descriptionText + }, + descriptionHTML: ['ds:5', 0, 10, 0, 1], + summary: ['ds:5', 0, 10, 1, 1], + installs: ['ds:5', 0, 12, 9, 0], + minInstalls: ['ds:5', 0, 12, 9, 1], + maxInstalls: ['ds:5', 0, 12, 9, 2], + score: ['ds:6', 0, 6, 0, 1], + scoreText: ['ds:6', 0, 6, 0, 0], + ratings: ['ds:6', 0, 6, 2, 1], + reviews: ['ds:6', 0, 6, 3, 1], + histogram: { + path: ['ds:6', 0, 6, 1], + fun: helper.buildHistogram + }, + + price: { + path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0], + fun: (val) => val / 1000000 || 0 + }, + free: { + path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0], + // considered free only if price is exactly zero + fun: (val) => val === 0 + }, + currency: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1], + priceText: { + path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 2], + fun: helper.priceText + }, + available: { + path: ['ds:5', 0, 12, 11, 0], + fun: Boolean + }, + offersIAP: { + path: ['ds:5', 0, 12, 12, 0], + fun: Boolean + }, + IAPRange: ['ds:5', 0, 12, 12, 0], + size: ['ds:8', 0], + androidVersion: { + path: ['ds:8', 2], + fun: helper.normalizeAndroidVersion + }, + androidVersionText: ['ds:8', 2], + developer: ['ds:5', 0, 12, 5, 1], + developerId: { + path: ['ds:5', 0, 12, 5, 5, 4, 2], + fun: (devUrl) => devUrl.split('id=')[1] + }, + developerEmail: ['ds:5', 0, 12, 5, 2, 0], + developerWebsite: ['ds:5', 0, 12, 5, 3, 5, 2], + developerAddress: ['ds:5', 0, 12, 5, 4, 0], + privacyPolicy: ['ds:5', 0, 12, 7, 2], + developerInternalID: ['ds:5', 0, 12, 5, 0, 0], + genre: ['ds:5', 0, 12, 13, 0, 0], + genreId: ['ds:5', 0, 12, 13, 0, 2], + familyGenre: ['ds:5', 0, 12, 13, 1, 0], + familyGenreId: ['ds:5', 0, 12, 13, 1, 2], + icon: ['ds:5', 0, 12, 1, 3, 2], + headerImage: ['ds:5', 0, 12, 2, 3, 2], + screenshots: { + path: ['ds:5', 0, 12, 0], + fun: (screenshots) => { + if (screenshots === null) return []; + return screenshots.map(R.path([3, 2])); + } + }, + video: ['ds:5', 0, 12, 3, 0, 3, 2], + videoImage: ['ds:5', 0, 12, 3, 1, 3, 2], + contentRating: ['ds:5', 0, 12, 4, 0], + contentRatingDescription: ['ds:5', 0, 12, 4, 2, 1], + adSupported: { + path: ['ds:5', 0, 12, 14, 0], + fun: Boolean + }, + released: ['ds:5', 0, 12, 36], + updated: { + path: ['ds:5', 0, 12, 8, 0], + fun: (ts) => ts * 1000 + }, + version: ['ds:8', 1], + recentChanges: ['ds:5', 0, 12, 6, 1], + comments: { + useServiceRequestId: 'UsvDTd', + path: [0], + fun: helper.extractComments + }, + editorsChoice: { + path: ['ds:5', 0, 12, 15, 0], + fun: Boolean + }, + features: { + path: ['ds:5', 0, 12, 16], + fun: helper.extractFeatures + } +}; diff --git a/lib/mapping/app/general.v2.js b/lib/mapping/app/general.v2.js new file mode 100644 index 00000000..2bed0653 --- /dev/null +++ b/lib/mapping/app/general.v2.js @@ -0,0 +1,115 @@ +const R = require('ramda'); +const helper = require('../../utils/mappingHelpers'); + +/** + * Mapping for app information starting 05/2022 + */ +module.exports = { + title: ['ds:4', 1, 2, 0, 0], + description: { + path: ['ds:4', 1, 2, 72, 0, 1], + fun: helper.descriptionText + }, + descriptionHTML: ['ds:4', 1, 2, 72, 0, 1], + summary: ['ds:4', 1, 2, 73, 0, 1], + installs: ['ds:4', 1, 2, 13, 0], + minInstalls: ['ds:4', 1, 2, 13, 1], + maxInstalls: ['ds:4', 1, 2, 13, 2], + score: ['ds:4', 1, 2, 51, 0, 1], + scoreText: ['ds:4', 1, 2, 51, 0, 0], + ratings: ['ds:4', 1, 2, 51, 2, 1], + reviews: ['ds:4', 1, 2, 51, 3, 1], + histogram: { + path: ['ds:4', 1, 2, 51, 1], + fun: helper.buildHistogram + }, + price: { + path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 0], + fun: (val) => val / 1000000 || 0 + }, + free: { + path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 0], + // considered free only if price is exactly zero + fun: (val) => val === 0 + }, + currency: [ + ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1], + ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 1] + ], + priceText: { + path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 2], + fun: helper.priceText + }, + available: { + path: ['ds:4', 1, 2, 18, 0], + fun: Boolean + }, + offersIAP: { + path: ['ds:4', 1, 2, 19, 0], + fun: Boolean + }, + IAPRange: ['ds:4', 1, 2, 19, 0], + /* size: ['ds:8', 0], */ + androidVersion: { + path: ['ds:4', 1, 2, 140, 1, 1, 0, 0, 1], + fun: helper.normalizeAndroidVersion + }, + androidVersionText: ['ds:4', 1, 2, 140, 1, 1, 0, 0, 1], + developer: ['ds:4', 1, 2, 68, 0], + developerId: { + path: ['ds:4', 1, 2, 68, 1, 4, 2], + fun: (devUrl) => devUrl.split('id=')[1] + }, + developerEmail: ['ds:4', 1, 2, 69, 1, 0], + developerWebsite: ['ds:4', 1, 2, 69, 0, 5, 2], + developerAddress: ['ds:4', 1, 2, 69, 2, 0], + privacyPolicy: ['ds:4', 1, 2, 99, 0, 5, 2], + developerInternalID: { + path: ['ds:4', 1, 2, 68, 1, 4, 2], + fun: (devUrl) => devUrl.split('id=')[1] + }, + genre: ['ds:4', 1, 2, 79, 0, 0, 0], + genreId: ['ds:4', 1, 2, 79, 0, 0, 2], + familyGenre: ['ds:5', 0, 12, 13, 1, 0], + familyGenreId: ['ds:5', 0, 12, 13, 1, 2], + icon: ['ds:4', 1, 2, 95, 0, 3, 2], + headerImage: ['ds:4', 1, 2, 96, 0, 3, 2], + screenshots: { + path: ['ds:4', 1, 2, 78, 0], + fun: (screenshots) => { + if (screenshots === null) return []; + return screenshots.map(R.path([3, 2])); + } + }, + video: ['ds:4', 1, 2, 100, 0, 0, 3, 2], + videoImage: ['ds:4', 1, 2, 100, 1, 0, 3, 2], + contentRating: ['ds:4', 1, 2, 9, 0], + contentRatingDescription: ['ds:4', 1, 2, 9, 2, 1], + adSupported: { + path: ['ds:4', 1, 2, 48], + fun: Boolean + }, + released: ['ds:4', 1, 2, 10, 0], + updated: { + path: ['ds:4', 1, 2, 145, 0, 1, 0], + fun: (ts) => ts * 1000 + }, + version: { + path: ['ds:4', 1, 2, 140, 0, 0, 0], + fun: (val) => val || 'VARY' + }, + recentChanges: ['ds:4', 1, 2, 144, 1, 1], + comments: { + path: ['ds:8', 0], + isArray: true, + fun: helper.extractComments + } +/* editorsChoice: { + path: ['ds:5', 0, 12, 15, 0], + fun: Boolean + }, + features: { + path: ['ds:5', 0, 12, 16], + fun: extractFeatures + } */ +}; diff --git a/lib/utils/mappingHelpers.js b/lib/utils/mappingHelpers.js new file mode 100644 index 00000000..ddc7d028 --- /dev/null +++ b/lib/utils/mappingHelpers.js @@ -0,0 +1,68 @@ +const cheerio = require('cheerio'); +const R = require('ramda'); + +function descriptionText (description) { + // preserve the line breaks when converting to text + const html = cheerio.load('
' + description.replace(/
/g, '\r\n') + '
'); + return html('div').text(); +} + +function priceText (priceText) { + return priceText || 'Free'; +} + +function normalizeAndroidVersion (androidVersionText) { + if (!androidVersionText) return 'VARY'; + + const number = androidVersionText.split(' ')[0]; + if (parseFloat(number)) { + return number; + } + + return 'VARY'; +} + +function buildHistogram (container) { + if (!container) { + return { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 }; + } + + return { + 1: container[1][1], + 2: container[2][1], + 3: container[3][1], + 4: container[4][1], + 5: container[5][1] + }; +} + +/** + * Extract the comments from google play script array + * @param {array} comments The comments array + */ +function extractComments (comments) { + if (!comments) return []; + return comments.map(R.path([4])).slice(0, 5); +} + +function extractFeatures (featuresArray) { + if (featuresArray === null) { + return []; + } + + const features = featuresArray[2] || []; + + return features.map(feature => ({ + title: feature[0], + description: R.path([1, 0, 0, 1], feature) + })); +} + +module.exports = { + descriptionText, + priceText, + normalizeAndroidVersion, + buildHistogram, + extractComments, + extractFeatures +}; diff --git a/package-lock.json b/package-lock.json index 9a916da8..032edff2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "8.1.0", "license": "MIT", "dependencies": { - "cheerio": "^1.0.0-rc.10", + "cheerio": "1.0.0-rc.10", "debug": "^2.2.0", "got": "^11.8.3", "memoizee": "^0.4.11", diff --git a/package.json b/package.json index 8096851c..82adb425 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ }, "homepage": "https://github.com/facundoolano/google-play-scraper", "dependencies": { - "cheerio": "^1.0.0-rc.10", + "cheerio": "1.0.0-rc.10", "debug": "^2.2.0", "got": "^11.8.3", "memoizee": "^0.4.11",