Skip to content

Commit

Permalink
fix: case insensitive directives when merging robots.txt (#150)
Browse files Browse the repository at this point in the history
Co-authored-by: Philipp Naderer-Puiu <[email protected]>
  • Loading branch information
harlan-zw and Philipp Naderer-Puiu authored Oct 15, 2024
1 parent 12c2087 commit f4d6072
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 8 deletions.
14 changes: 7 additions & 7 deletions src/runtime/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt {
if (sepIndex === -1)
continue
// get the rule, pop before the first :
const rule = line.substring(0, sepIndex).trim()
const rule = line.substring(0, sepIndex).trim().toLowerCase()
const val = line.substring(sepIndex + 1).trim()

switch (rule) {
case 'User-agent':
case 'user-agent':
if (createNewGroup) {
groups.push({
...currentGroup,
Expand All @@ -53,21 +53,21 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt {
}
currentGroup.userAgent.push(val)
break
case 'Allow':
case 'allow':
currentGroup.allow.push(val)
createNewGroup = true
break
case 'Disallow':
case 'disallow':
currentGroup.disallow.push(val)
createNewGroup = true
break
case 'Sitemap':
case 'sitemap':
sitemaps.push(val)
break
case 'Host':
case 'host':
currentGroup.host = val
break
case 'Clean-param':
case 'clean-param':
if (currentGroup.userAgent.includes('Yandex')) {
currentGroup.cleanParam = currentGroup.cleanParam || []
currentGroup.cleanParam.push(val)
Expand Down
14 changes: 14 additions & 0 deletions test/fixtures/startgroupRobots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# see rfc 9309
user-agent: ExampleBot
disallow: /foo
allow: /bar

user-agent: examplebot
disallow: /baz
allow: /boo

user-agent:
disallow: /invalid

user-agent: *
disallow: /star
58 changes: 57 additions & 1 deletion test/unit/robotsTxtParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ describe('robotsTxtParser', () => {
})

it('yandex', async () => {
// read fixture yoastRobots.txt
// read fixture yandex.txt
const robotsTxt = await fsp.readFile('./test/fixtures/yandex.txt', { encoding: 'utf-8' })
expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(`
{
Expand Down Expand Up @@ -281,4 +281,60 @@ describe('robotsTxtParser', () => {
}
`)
})

it('case-insensitive startgroupline', async () => {
// read fixture startgroupRobots.txt
const robotsTxt = await fsp.readFile('./test/fixtures/startgroupRobots.txt', { encoding: 'utf-8' })
expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(`
{
"groups": [
{
"allow": [
"/bar",
],
"comment": [],
"disallow": [
"/foo",
],
"userAgent": [
"ExampleBot",
],
},
{
"allow": [
"/boo",
],
"comment": [],
"disallow": [
"/baz",
],
"userAgent": [
"examplebot",
],
},
{
"allow": [],
"comment": [],
"disallow": [
"/invalid",
],
"userAgent": [
"",
],
},
{
"allow": [],
"comment": [],
"disallow": [
"/star",
],
"userAgent": [
"*",
],
},
],
"sitemaps": [],
}
`)
})
})

0 comments on commit f4d6072

Please sign in to comment.