forked from clearlydefined/crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request clearlydefined#620 from yashkohli88/yk/add-default…
…-headers-centrally Update fetch file to centralize default headers
- Loading branch information
Showing
6 changed files
with
77 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,10 @@ | |
|
||
const axios = require('axios') | ||
|
||
const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler ([email protected])' }) | ||
|
||
axios.defaults.headers = defaultHeaders | ||
|
||
function buildRequestOptions(request) { | ||
let responseType = 'text' | ||
if (request.json) { | ||
|
@@ -45,4 +49,4 @@ function withDefaults(opts) { | |
return request => callFetch(request, axiosInstance) | ||
} | ||
|
||
module.exports = { callFetch, withDefaults } | ||
module.exports = { callFetch, withDefaults, defaultHeaders } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,8 +51,7 @@ class CratesioFetch extends AbstractFetch { | |
try { | ||
registryData = await request({ | ||
url: `https://crates.io/api/v1/crates/${spec.name}`, | ||
json: true, | ||
headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' } | ||
json: true | ||
}) | ||
} catch (exception) { | ||
if (exception.statusCode !== 404) throw exception | ||
|
@@ -72,7 +71,6 @@ class CratesioFetch extends AbstractFetch { | |
url: `https://crates.io${version.dl_path}`, | ||
encoding: null, | ||
headers: { | ||
'User-Agent': 'clearlydefined.io crawler ([email protected])', | ||
Accept: 'text/html' | ||
} | ||
}) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
// SPDX-License-Identifier: MIT | ||
|
||
const AbstractFetch = require('./abstractFetch') | ||
const { withDefaults } = require('../../lib/fetch') | ||
const { callFetch, defaultHeaders } = require('../../lib/fetch') | ||
const nodeRequest = require('request') | ||
const { clone, get } = require('lodash') | ||
const { promisify } = require('util') | ||
|
@@ -23,14 +23,12 @@ const extensionMap = { | |
jar: '.jar' | ||
} | ||
|
||
const defaultHeaders = { headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' } } | ||
|
||
class MavenBasedFetch extends AbstractFetch { | ||
constructor(providerMap, options) { | ||
super(options) | ||
this._providerMap = { ...providerMap } | ||
this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders) | ||
this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get | ||
this._handleRequestPromise = options.requestPromise || callFetch | ||
this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get | ||
} | ||
|
||
canHandle(request) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ const nodeRequest = require('request') | |
const { promisify } = require('util') | ||
const readdir = promisify(fs.readdir) | ||
const FetchResult = require('../../lib/fetchResult') | ||
const { defaultHeaders } = require('../../lib/fetch') | ||
|
||
const providerMap = { | ||
packagist: 'https://repo.packagist.org/' | ||
|
@@ -62,9 +63,7 @@ class PackagistFetch extends AbstractFetch { | |
return new Promise((resolve, reject) => { | ||
const options = { | ||
url: distUrl, | ||
headers: { | ||
'User-Agent': 'clearlydefined.io crawler ([email protected])' | ||
} | ||
headers: defaultHeaders | ||
} | ||
nodeRequest | ||
.get(options, (error, response) => { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,14 @@ | ||
const { fail } = require('assert') | ||
const { callFetch, withDefaults } = require('../../../lib/fetch') | ||
const { callFetch, withDefaults, defaultHeaders } = require('../../../lib/fetch') | ||
const { expect } = require('chai') | ||
const fs = require('fs') | ||
const mockttp = require('mockttp') | ||
|
||
function checkDefaultHeaders(headers) { | ||
for (const [key, value] of Object.entries(defaultHeaders)) { | ||
expect(headers).to.have.property(key.toLowerCase()).that.equals(value) | ||
} | ||
} | ||
describe('CallFetch', () => { | ||
describe('with mock server', () => { | ||
const mockServer = mockttp.getLocal() | ||
|
@@ -23,6 +28,37 @@ describe('CallFetch', () => { | |
expect(response).to.be.deep.equal(JSON.parse(expected)) | ||
}) | ||
|
||
it('checks if the default header user-agent and other header is present in crate components', async () => { | ||
const path = '/crates.io/api/v1/crates/name/1.0.0/download' | ||
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success') | ||
|
||
await callFetch({ | ||
url: mockServer.urlFor(path), | ||
method: 'GET', | ||
json: true, | ||
encoding: null, | ||
headers: { | ||
Accept: 'text/html' | ||
} | ||
}) | ||
const requests = await endpointMock.getSeenRequests() | ||
checkDefaultHeaders(requests[0].headers) | ||
expect(requests[0].headers).to.include({ accept: 'text/html' }) | ||
}) | ||
|
||
it('checks if the default header user-agent is present in crate components', async () => { | ||
const path = '/crates.io/api/v1/crates/name' | ||
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success') | ||
|
||
await callFetch({ | ||
url: mockServer.urlFor(path), | ||
method: 'GET', | ||
json: true | ||
}) | ||
const requests = await endpointMock.getSeenRequests() | ||
checkDefaultHeaders(requests[0].headers) | ||
}) | ||
|
||
it('checks if the full response is fetched', async () => { | ||
const path = '/registry.npmjs.com/redis/0.1.0' | ||
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json') | ||
|
@@ -87,17 +123,17 @@ describe('CallFetch', () => { | |
const url = mockServer.urlFor(path) | ||
const endpointMock = await mockServer.forGet(path).thenReply(200) | ||
|
||
const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler ([email protected])' } } | ||
const defaultOptions = { headers: defaultHeaders } | ||
const requestWithDefaults = withDefaults(defaultOptions) | ||
await requestWithDefaults({ url }) | ||
await requestWithDefaults({ url }) | ||
|
||
const requests = await endpointMock.getSeenRequests() | ||
expect(requests.length).to.equal(2) | ||
expect(requests[0].url).to.equal(url) | ||
expect(requests[0].headers).to.include(defaultOptions.headers) | ||
checkDefaultHeaders(requests[0].headers) | ||
expect(requests[1].url).to.equal(url) | ||
expect(requests[1].headers).to.include(defaultOptions.headers) | ||
checkDefaultHeaders(requests[1].headers) | ||
}) | ||
|
||
it('checks if the response is text with uri option in GET request', async () => { | ||
|
@@ -129,6 +165,8 @@ describe('CallFetch', () => { | |
const json = await requests[0].body.getJson() | ||
expect(json).to.deep.equal({ test: 'test' }) | ||
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' }) | ||
//Check for the default header value | ||
checkDefaultHeaders(requests[0].headers) | ||
}) | ||
|
||
describe('test simple', () => { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters