diff --git a/lib/app.js b/lib/app.js
index 28b6e9cb..3b2d6e27 100644
--- a/lib/app.js
+++ b/lib/app.js
@@ -4,8 +4,9 @@ const R = require('ramda');
const queryString = require('querystring');
const request = require('./utils/request');
const scriptData = require('./utils/scriptData');
-const debug = require('debug')('google-play-scraper');
-const cheerio = require('cheerio');
+const mappingV1 = require('./mapping/app/general.v1');
+const mappingV2 = require('./mapping/app/general.v2');
+
const { BASE_URL } = require('./constants');
const PLAYSTORE_URL = `${BASE_URL}/store/apps/details`;
@@ -34,7 +35,12 @@ function app (opts) {
request(options, opts.throttle)
.then(scriptData.parse)
// comment next line to get raw data
- .then(scriptData.extractor(MAPPINGS))
+ .then((parsedData) => {
+ const isV2 = typeof R.path(mappingV1.title[0], parsedData) !== 'string';
+ const mapping = isV2 ? mappingV2 : mappingV1;
+
+ return scriptData.extractor(mapping)(parsedData);
+ })
.then(R.assoc('appId', opts.appId))
.then(R.assoc('url', reqUrl))
.then(resolve)
@@ -42,170 +48,4 @@ function app (opts) {
});
}
-const MAPPINGS = {
- // FIXME add appId
- title: ['ds:5', 0, 0, 0],
- description: {
- path: ['ds:5', 0, 10, 0, 1],
- fun: descriptionText
- },
- descriptionHTML: ['ds:5', 0, 10, 0, 1],
- summary: ['ds:5', 0, 10, 1, 1],
- installs: ['ds:5', 0, 12, 9, 0],
- minInstalls: ['ds:5', 0, 12, 9, 1],
- maxInstalls: ['ds:5', 0, 12, 9, 2],
- score: ['ds:6', 0, 6, 0, 1],
- scoreText: ['ds:6', 0, 6, 0, 0],
- ratings: ['ds:6', 0, 6, 2, 1],
- reviews: ['ds:6', 0, 6, 3, 1],
- histogram: {
- path: ['ds:6', 0, 6, 1],
- fun: buildHistogram
- },
-
- price: {
- path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0],
- fun: (val) => val / 1000000 || 0
- },
- free: {
- path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0],
- // considered free only if price is exactly zero
- fun: (val) => val === 0
- },
- currency: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1],
- priceText: {
- path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 2],
- fun: priceText
- },
- available: {
- path: ['ds:5', 0, 12, 11, 0],
- fun: Boolean
- },
- offersIAP: {
- path: ['ds:5', 0, 12, 12, 0],
- fun: Boolean
- },
- IAPRange: ['ds:5', 0, 12, 12, 0],
- size: ['ds:8', 0],
- androidVersion: {
- path: ['ds:8', 2],
- fun: normalizeAndroidVersion
- },
- androidVersionText: ['ds:8', 2],
- developer: ['ds:5', 0, 12, 5, 1],
- developerId: {
- path: ['ds:5', 0, 12, 5, 5, 4, 2],
- fun: (devUrl) => devUrl.split('id=')[1]
- },
- developerEmail: ['ds:5', 0, 12, 5, 2, 0],
- developerWebsite: ['ds:5', 0, 12, 5, 3, 5, 2],
- developerAddress: ['ds:5', 0, 12, 5, 4, 0],
- privacyPolicy: ['ds:5', 0, 12, 7, 2],
- developerInternalID: ['ds:5', 0, 12, 5, 0, 0],
- genre: ['ds:5', 0, 12, 13, 0, 0],
- genreId: ['ds:5', 0, 12, 13, 0, 2],
- familyGenre: ['ds:5', 0, 12, 13, 1, 0],
- familyGenreId: ['ds:5', 0, 12, 13, 1, 2],
- icon: ['ds:5', 0, 12, 1, 3, 2],
- headerImage: ['ds:5', 0, 12, 2, 3, 2],
- screenshots: {
- path: ['ds:5', 0, 12, 0],
- fun: (screenshots) => {
- if (screenshots === null) return [];
- return screenshots.map(R.path([3, 2]));
- }
- },
- video: ['ds:5', 0, 12, 3, 0, 3, 2],
- videoImage: ['ds:5', 0, 12, 3, 1, 3, 2],
- contentRating: ['ds:5', 0, 12, 4, 0],
- contentRatingDescription: ['ds:5', 0, 12, 4, 2, 1],
- adSupported: {
- path: ['ds:5', 0, 12, 14, 0],
- fun: Boolean
- },
- released: ['ds:5', 0, 12, 36],
- updated: {
- path: ['ds:5', 0, 12, 8, 0],
- fun: (ts) => ts * 1000
- },
- version: ['ds:8', 1],
- recentChanges: ['ds:5', 0, 12, 6, 1],
- comments: {
- useServiceRequestId: 'UsvDTd',
- path: [0],
- fun: extractComments
- },
- editorsChoice: {
- path: ['ds:5', 0, 12, 15, 0],
- fun: Boolean
- },
- features: {
- path: ['ds:5', 0, 12, 16],
- fun: extractFeatures
- }
-};
-
-function extractFeatures (featuresArray) {
- if (featuresArray === null) {
- return [];
- }
-
- const features = featuresArray[2] || [];
-
- return features.map(feature => ({
- title: feature[0],
- description: R.path([1, 0, 0, 1], feature)
- }));
-}
-
-function descriptionText (description) {
- // preserve the line breaks when converting to text
- const html = cheerio.load('
' + description.replace(/
/g, '\r\n') + '
');
- return html('div').text();
-}
-
-function priceText (priceText) {
- return priceText || 'Free';
-}
-
-function normalizeAndroidVersion (androidVersionText) {
- const number = androidVersionText.split(' ')[0];
- if (parseFloat(number)) {
- return number;
- }
-
- return 'VARY';
-}
-
-function buildHistogram (container) {
- if (!container) {
- return { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 };
- }
-
- return {
- 1: container[1][1],
- 2: container[2][1],
- 3: container[3][1],
- 4: container[4][1],
- 5: container[5][1]
- };
-}
-
-/**
- * Extract the comments from google play script array
- * @param {array} comments The comments array
- */
-function extractComments (comments) {
- if (!comments) {
- return [];
- }
-
- debug('comments: %O', comments);
-
- return R.compose(
- R.take(5),
- R.reject(R.isNil),
- R.pluck(4))(comments);
-}
-
module.exports = app;
diff --git a/lib/mapping/app/general.v1.js b/lib/mapping/app/general.v1.js
new file mode 100644
index 00000000..093b5dc3
--- /dev/null
+++ b/lib/mapping/app/general.v1.js
@@ -0,0 +1,105 @@
+const R = require('ramda');
+const helper = require('../../utils/mappingHelpers');
+
+module.exports = {
+ // FIXME add appId
+ title: ['ds:5', 0, 0, 0],
+ description: {
+ path: ['ds:5', 0, 10, 0, 1],
+ fun: helper.descriptionText
+ },
+ descriptionHTML: ['ds:5', 0, 10, 0, 1],
+ summary: ['ds:5', 0, 10, 1, 1],
+ installs: ['ds:5', 0, 12, 9, 0],
+ minInstalls: ['ds:5', 0, 12, 9, 1],
+ maxInstalls: ['ds:5', 0, 12, 9, 2],
+ score: ['ds:6', 0, 6, 0, 1],
+ scoreText: ['ds:6', 0, 6, 0, 0],
+ ratings: ['ds:6', 0, 6, 2, 1],
+ reviews: ['ds:6', 0, 6, 3, 1],
+ histogram: {
+ path: ['ds:6', 0, 6, 1],
+ fun: helper.buildHistogram
+ },
+
+ price: {
+ path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0],
+ fun: (val) => val / 1000000 || 0
+ },
+ free: {
+ path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 0],
+ // considered free only if price is exactly zero
+ fun: (val) => val === 0
+ },
+ currency: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1],
+ priceText: {
+ path: ['ds:3', 0, 2, 0, 0, 0, 1, 0, 2],
+ fun: helper.priceText
+ },
+ available: {
+ path: ['ds:5', 0, 12, 11, 0],
+ fun: Boolean
+ },
+ offersIAP: {
+ path: ['ds:5', 0, 12, 12, 0],
+ fun: Boolean
+ },
+ IAPRange: ['ds:5', 0, 12, 12, 0],
+ size: ['ds:8', 0],
+ androidVersion: {
+ path: ['ds:8', 2],
+ fun: helper.normalizeAndroidVersion
+ },
+ androidVersionText: ['ds:8', 2],
+ developer: ['ds:5', 0, 12, 5, 1],
+ developerId: {
+ path: ['ds:5', 0, 12, 5, 5, 4, 2],
+ fun: (devUrl) => devUrl.split('id=')[1]
+ },
+ developerEmail: ['ds:5', 0, 12, 5, 2, 0],
+ developerWebsite: ['ds:5', 0, 12, 5, 3, 5, 2],
+ developerAddress: ['ds:5', 0, 12, 5, 4, 0],
+ privacyPolicy: ['ds:5', 0, 12, 7, 2],
+ developerInternalID: ['ds:5', 0, 12, 5, 0, 0],
+ genre: ['ds:5', 0, 12, 13, 0, 0],
+ genreId: ['ds:5', 0, 12, 13, 0, 2],
+ familyGenre: ['ds:5', 0, 12, 13, 1, 0],
+ familyGenreId: ['ds:5', 0, 12, 13, 1, 2],
+ icon: ['ds:5', 0, 12, 1, 3, 2],
+ headerImage: ['ds:5', 0, 12, 2, 3, 2],
+ screenshots: {
+ path: ['ds:5', 0, 12, 0],
+ fun: (screenshots) => {
+ if (screenshots === null) return [];
+ return screenshots.map(R.path([3, 2]));
+ }
+ },
+ video: ['ds:5', 0, 12, 3, 0, 3, 2],
+ videoImage: ['ds:5', 0, 12, 3, 1, 3, 2],
+ contentRating: ['ds:5', 0, 12, 4, 0],
+ contentRatingDescription: ['ds:5', 0, 12, 4, 2, 1],
+ adSupported: {
+ path: ['ds:5', 0, 12, 14, 0],
+ fun: Boolean
+ },
+ released: ['ds:5', 0, 12, 36],
+ updated: {
+ path: ['ds:5', 0, 12, 8, 0],
+ fun: (ts) => ts * 1000
+ },
+ version: ['ds:8', 1],
+ recentChanges: ['ds:5', 0, 12, 6, 1],
+ comments: {
+ useServiceRequestId: 'UsvDTd',
+ path: [0],
+ fun: helper.extractComments
+ },
+ editorsChoice: {
+ path: ['ds:5', 0, 12, 15, 0],
+ fun: Boolean
+ },
+ features: {
+ path: ['ds:5', 0, 12, 16],
+ fun: helper.extractFeatures
+ }
+};
diff --git a/lib/mapping/app/general.v2.js b/lib/mapping/app/general.v2.js
new file mode 100644
index 00000000..2bed0653
--- /dev/null
+++ b/lib/mapping/app/general.v2.js
@@ -0,0 +1,115 @@
+const R = require('ramda');
+const helper = require('../../utils/mappingHelpers');
+
+/**
+ * Mapping for app information starting 05/2022
+ */
+module.exports = {
+ title: ['ds:4', 1, 2, 0, 0],
+ description: {
+ path: ['ds:4', 1, 2, 72, 0, 1],
+ fun: helper.descriptionText
+ },
+ descriptionHTML: ['ds:4', 1, 2, 72, 0, 1],
+ summary: ['ds:4', 1, 2, 73, 0, 1],
+ installs: ['ds:4', 1, 2, 13, 0],
+ minInstalls: ['ds:4', 1, 2, 13, 1],
+ maxInstalls: ['ds:4', 1, 2, 13, 2],
+ score: ['ds:4', 1, 2, 51, 0, 1],
+ scoreText: ['ds:4', 1, 2, 51, 0, 0],
+ ratings: ['ds:4', 1, 2, 51, 2, 1],
+ reviews: ['ds:4', 1, 2, 51, 3, 1],
+ histogram: {
+ path: ['ds:4', 1, 2, 51, 1],
+ fun: helper.buildHistogram
+ },
+ price: {
+ path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 0],
+ fun: (val) => val / 1000000 || 0
+ },
+ free: {
+ path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 0],
+ // considered free only if price is exactly zero
+ fun: (val) => val === 0
+ },
+ currency: [
+ ['ds:3', 0, 2, 0, 0, 0, 1, 0, 1],
+ ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 1]
+ ],
+ priceText: {
+ path: ['ds:4', 1, 2, 57, 0, 0, 0, 0, 1, 0, 2],
+ fun: helper.priceText
+ },
+ available: {
+ path: ['ds:4', 1, 2, 18, 0],
+ fun: Boolean
+ },
+ offersIAP: {
+ path: ['ds:4', 1, 2, 19, 0],
+ fun: Boolean
+ },
+ IAPRange: ['ds:4', 1, 2, 19, 0],
+ /* size: ['ds:8', 0], */
+ androidVersion: {
+ path: ['ds:4', 1, 2, 140, 1, 1, 0, 0, 1],
+ fun: helper.normalizeAndroidVersion
+ },
+ androidVersionText: ['ds:4', 1, 2, 140, 1, 1, 0, 0, 1],
+ developer: ['ds:4', 1, 2, 68, 0],
+ developerId: {
+ path: ['ds:4', 1, 2, 68, 1, 4, 2],
+ fun: (devUrl) => devUrl.split('id=')[1]
+ },
+ developerEmail: ['ds:4', 1, 2, 69, 1, 0],
+ developerWebsite: ['ds:4', 1, 2, 69, 0, 5, 2],
+ developerAddress: ['ds:4', 1, 2, 69, 2, 0],
+ privacyPolicy: ['ds:4', 1, 2, 99, 0, 5, 2],
+ developerInternalID: {
+ path: ['ds:4', 1, 2, 68, 1, 4, 2],
+ fun: (devUrl) => devUrl.split('id=')[1]
+ },
+ genre: ['ds:4', 1, 2, 79, 0, 0, 0],
+ genreId: ['ds:4', 1, 2, 79, 0, 0, 2],
+ familyGenre: ['ds:5', 0, 12, 13, 1, 0],
+ familyGenreId: ['ds:5', 0, 12, 13, 1, 2],
+ icon: ['ds:4', 1, 2, 95, 0, 3, 2],
+ headerImage: ['ds:4', 1, 2, 96, 0, 3, 2],
+ screenshots: {
+ path: ['ds:4', 1, 2, 78, 0],
+ fun: (screenshots) => {
+ if (screenshots === null) return [];
+ return screenshots.map(R.path([3, 2]));
+ }
+ },
+ video: ['ds:4', 1, 2, 100, 0, 0, 3, 2],
+ videoImage: ['ds:4', 1, 2, 100, 1, 0, 3, 2],
+ contentRating: ['ds:4', 1, 2, 9, 0],
+ contentRatingDescription: ['ds:4', 1, 2, 9, 2, 1],
+ adSupported: {
+ path: ['ds:4', 1, 2, 48],
+ fun: Boolean
+ },
+ released: ['ds:4', 1, 2, 10, 0],
+ updated: {
+ path: ['ds:4', 1, 2, 145, 0, 1, 0],
+ fun: (ts) => ts * 1000
+ },
+ version: {
+ path: ['ds:4', 1, 2, 140, 0, 0, 0],
+ fun: (val) => val || 'VARY'
+ },
+ recentChanges: ['ds:4', 1, 2, 144, 1, 1],
+ comments: {
+ path: ['ds:8', 0],
+ isArray: true,
+ fun: helper.extractComments
+ }
+/* editorsChoice: {
+ path: ['ds:5', 0, 12, 15, 0],
+ fun: Boolean
+ },
+ features: {
+ path: ['ds:5', 0, 12, 16],
+ fun: extractFeatures
+ } */
+};
diff --git a/lib/utils/mappingHelpers.js b/lib/utils/mappingHelpers.js
new file mode 100644
index 00000000..ddc7d028
--- /dev/null
+++ b/lib/utils/mappingHelpers.js
@@ -0,0 +1,68 @@
+const cheerio = require('cheerio');
+const R = require('ramda');
+
+function descriptionText (description) {
+ // preserve the line breaks when converting to text
+ const html = cheerio.load('' + description.replace(/
/g, '\r\n') + '
');
+ return html('div').text();
+}
+
+function priceText (priceText) {
+ return priceText || 'Free';
+}
+
+function normalizeAndroidVersion (androidVersionText) {
+ if (!androidVersionText) return 'VARY';
+
+ const number = androidVersionText.split(' ')[0];
+ if (parseFloat(number)) {
+ return number;
+ }
+
+ return 'VARY';
+}
+
+function buildHistogram (container) {
+ if (!container) {
+ return { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0 };
+ }
+
+ return {
+ 1: container[1][1],
+ 2: container[2][1],
+ 3: container[3][1],
+ 4: container[4][1],
+ 5: container[5][1]
+ };
+}
+
+/**
+ * Extract the comments from google play script array
+ * @param {array} comments The comments array
+ */
+function extractComments (comments) {
+ if (!comments) return [];
+ return comments.map(R.path([4])).slice(0, 5);
+}
+
+function extractFeatures (featuresArray) {
+ if (featuresArray === null) {
+ return [];
+ }
+
+ const features = featuresArray[2] || [];
+
+ return features.map(feature => ({
+ title: feature[0],
+ description: R.path([1, 0, 0, 1], feature)
+ }));
+}
+
+module.exports = {
+ descriptionText,
+ priceText,
+ normalizeAndroidVersion,
+ buildHistogram,
+ extractComments,
+ extractFeatures
+};
diff --git a/package-lock.json b/package-lock.json
index 9a916da8..032edff2 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,7 +9,7 @@
"version": "8.1.0",
"license": "MIT",
"dependencies": {
- "cheerio": "^1.0.0-rc.10",
+ "cheerio": "1.0.0-rc.10",
"debug": "^2.2.0",
"got": "^11.8.3",
"memoizee": "^0.4.11",
diff --git a/package.json b/package.json
index 8096851c..82adb425 100644
--- a/package.json
+++ b/package.json
@@ -23,7 +23,7 @@
},
"homepage": "https://github.com/facundoolano/google-play-scraper",
"dependencies": {
- "cheerio": "^1.0.0-rc.10",
+ "cheerio": "1.0.0-rc.10",
"debug": "^2.2.0",
"got": "^11.8.3",
"memoizee": "^0.4.11",