-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Strip non-AMP javascript. Addresses #148
PiperOrigin-RevId: 216936577
- Loading branch information
Showing
4 changed files
with
229 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
// Copyright 2018 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package transformers | ||
|
||
import ( | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/ampproject/amppackager/transformer/internal/amphtml" | ||
"github.com/ampproject/amppackager/transformer/internal/htmlnode" | ||
"golang.org/x/net/html/atom" | ||
"golang.org/x/net/html" | ||
) | ||
|
||
// StripJS removes non-AMP javascript from the DOM. | ||
// - For <script> elements, remove where any of the following is true: | ||
// - has a src attribute whose value is not prefixed by https://cdn.ampproject.org/ (case-insensitive match). | ||
// - It has no src attribute and no type attribute (case-insensitive match). | ||
// - It has a type attribute whose value is neither application/json nor application/ld+json (case-insensitive match on both name and value). | ||
// | ||
// - For all other elements, remove any event attribute that matches "on[A-Za-z].*". | ||
func StripJS(e *Context) error { | ||
stripJS(e.DOM.RootNode) | ||
return nil | ||
} | ||
|
||
// eventRE is a regexp to match event attributes, e.g. onClick | ||
var eventRE = func() *regexp.Regexp { | ||
r := regexp.MustCompile("^on[A-Za-z].*") | ||
return r | ||
}() | ||
|
||
// stripJS recursively does the actual work on each node. | ||
func stripJS(n *html.Node) { | ||
if n.Type == html.ElementNode { | ||
switch n.DataAtom { | ||
case atom.Script: | ||
srcVal, srcOk := htmlnode.GetAttributeVal(n, "src") | ||
if srcOk { | ||
if !strings.HasPrefix(strings.ToLower(srcVal), amphtml.AMPCacheRootURL) { | ||
n.Parent.RemoveChild(n) | ||
return | ||
} | ||
} | ||
typeVal, typeOk := htmlnode.GetAttributeVal(n, "type") | ||
if !srcOk && !typeOk { | ||
n.Parent.RemoveChild(n) | ||
return | ||
} | ||
if typeOk { | ||
switch strings.ToLower(typeVal) { | ||
case "application/json", "application/ld+json": | ||
// ok to keep | ||
default: | ||
n.Parent.RemoveChild(n) | ||
return | ||
} | ||
} | ||
default: | ||
for _, attr := range n.Attr { | ||
if attr.Namespace == "" { | ||
if match := eventRE.MatchString(attr.Key); match { | ||
htmlnode.RemoveAttribute(n, &attr) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
for c := n.FirstChild; c != nil; { | ||
// Track the next sibling because if the node is removed in the recursive | ||
// call, it becomes orphaned and the pointer to NextSibling is lost. | ||
next := c.NextSibling | ||
stripJS(c) | ||
c = next | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
// Copyright 2018 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package transformers_test | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
|
||
"github.com/ampproject/amppackager/transformer/internal/amphtml" | ||
tt "github.com/ampproject/amppackager/transformer/internal/testing" | ||
"github.com/ampproject/amppackager/transformer/transformers" | ||
"golang.org/x/net/html" | ||
) | ||
|
||
func TestStripJS(t *testing.T) { | ||
tcs := []tt.TestCase{ | ||
{ | ||
Desc: "strips script wrong src", | ||
Input: "<script src=main.js/>", | ||
Expected: "<head></head><body></body>", | ||
}, | ||
{ | ||
Desc: "keeps script correct src", | ||
Input: "<script async src=\"https://cdn.ampproject.org/v0.js\"></script>", | ||
Expected: "<head><script async src=https://cdn.ampproject.org/v0.js></script></head>", | ||
}, | ||
{ | ||
Desc: "keeps script correct src case-insensitive", | ||
Input: "<script async custom-element='amp-analytics' src='https://cDn.AMPproject.org/v0/amp-analytics-0.1.js'></script>", | ||
Expected: "<head><script async custom-element=amp-analytics src=https://cDn.AMPproject.org/v0/amp-analytics-0.1.js></script></head>", | ||
}, | ||
{ | ||
Desc: "strips script no src, no type", | ||
Input: "<script>foo</script>", | ||
Expected: "<head></head><body></body>", | ||
}, | ||
{ | ||
Desc: "strips script wrong type", | ||
Input: "<script type=application/javascript>foo</script>", | ||
Expected: "<head></head><body></body>", | ||
}, | ||
{ | ||
Desc: "keeps script corect type", | ||
Input: "<script type=application/json>foo</script>", | ||
Expected: "<head><script type=application/json>foo</script></head><body></body>", | ||
}, | ||
{ | ||
Desc: "strip tag attr ona", | ||
Input: "<body><select ona=\"myFunction()\"></body>", | ||
Expected: "<body><select></select></body>", | ||
}, | ||
{ | ||
Desc: "strips tag event attr", | ||
Input: "<body><select onchange=\"myFunction()\"></body>", | ||
Expected: "<body><select></select></body>", | ||
}, | ||
{ | ||
Desc: "strip tag attr onfoo", | ||
Input: "<body><select onfoo=\"myFunction()\"></body>", | ||
Expected: "<body><select></select></body>", | ||
}, | ||
{ | ||
Desc: "keep tag attr 'on'", | ||
Input: "<body><select on=\"myFunction()\"></body>", | ||
Expected: "<body><select on=myFunction()></select></body>", | ||
}, | ||
{ | ||
Desc: "keep tag attr on-foo", | ||
Input: "<body><select on-foo=\"myFunction()\"></body>", | ||
Expected: "<body><select on-foo=myFunction()></select></body>", | ||
}, | ||
{ | ||
Desc: "keep tag attr notonchange", | ||
Input: "<body><select notonchange=\"myFunction()\"></body>", | ||
Expected: "<body><select notonchange=myFunction()></select></body>", | ||
}, | ||
} | ||
|
||
for _, tc := range tcs { | ||
inputDoc, err := html.Parse(strings.NewReader(tc.Input)) | ||
if err != nil { | ||
t.Errorf("%s: html.Parse failed %q", tc.Input, err) | ||
continue | ||
} | ||
inputDOM, err := amphtml.NewDOM(inputDoc) | ||
if err != nil { | ||
t.Errorf("%s\namphtml.NewDOM for %s failed %q", tc.Desc, tc.Input, err) | ||
continue | ||
} | ||
transformers.StripJS(&transformers.Context{DOM: inputDOM}) | ||
var input strings.Builder | ||
if err := html.Render(&input, inputDoc); err != nil { | ||
t.Errorf("%s: html.Render failed %q", tc.Input, err) | ||
continue | ||
} | ||
|
||
expectedDoc, err := html.Parse(strings.NewReader(tc.Expected)) | ||
if err != nil { | ||
t.Errorf("%s: html.Parse failed %q", tc.Expected, err) | ||
continue | ||
} | ||
var expected strings.Builder | ||
err = html.Render(&expected, expectedDoc) | ||
if err != nil { | ||
t.Errorf("%s: html.Render failed %q", tc.Expected, err) | ||
continue | ||
} | ||
if input.String() != expected.String() { | ||
t.Errorf("%s: Transform=\n%q\nwant=\n%q", tc.Desc, &input, &expected) | ||
} | ||
} | ||
|
||
} |