Skip to content

Commit

Permalink
Strip non-AMP javascript. Addresses #148
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 216936577
  • Loading branch information
alin04 authored and twifkak committed Oct 12, 2018
1 parent 398f20f commit 310b16c
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 13 deletions.
14 changes: 8 additions & 6 deletions transformer/transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ import (
"regexp"
"strings"

"github.com/ampproject/amppackager/transformer/printer"
rpb "github.com/ampproject/amppackager/transformer/request"
"github.com/ampproject/amppackager/transformer/internal/amphtml"
"github.com/ampproject/amppackager/transformer/internal/htmlnode"
"github.com/ampproject/amppackager/transformer/printer"
rpb "github.com/ampproject/amppackager/transformer/request"
"github.com/ampproject/amppackager/transformer/transformers"
"github.com/pkg/errors"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"golang.org/x/net/html"
)

// Transformer functions must be added here in order to be passed in from
Expand All @@ -46,6 +46,7 @@ var transformerFunctionMap = map[string]func(*transformers.Context) error{
"nodecleanup": transformers.NodeCleanup,
"reorderhead": transformers.ReorderHead,
"serversiderendering": transformers.ServerSideRendering,
"stripjs": transformers.StripJS,
"transformedidentifier": transformers.TransformedIdentifier,
"url": transformers.URL,
}
Expand All @@ -56,6 +57,7 @@ var configMap = map[rpb.Request_TransformersConfig][]func(*transformers.Context)
rpb.Request_DEFAULT: {
// NodeCleanup should be first.
transformers.NodeCleanup,
transformers.StripJS,
transformers.MetaTag,
// TODO(alin04): Reenable LinkTag once validation is done.
// transformers.LinkTag,
Expand Down Expand Up @@ -105,9 +107,9 @@ var ampAttrRE = func() *regexp.Regexp {
}()

// The allowed AMP formats, and their serialization as an html "amp4" attribute.
var ampFormatSuffixes = map[rpb.Request_HtmlFormat]string {
rpb.Request_AMP: "",
rpb.Request_AMP4ADS: "ads",
var ampFormatSuffixes = map[rpb.Request_HtmlFormat]string{
rpb.Request_AMP: "",
rpb.Request_AMP4ADS: "ads",
rpb.Request_AMP4EMAIL: "email",
}

Expand Down
14 changes: 7 additions & 7 deletions transformer/transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestProcess(t *testing.T) {
config rpb.Request_TransformersConfig
expectedLen int
}{
{rpb.Request_DEFAULT, 5},
{rpb.Request_DEFAULT, 6},
{rpb.Request_NONE, 0},
{rpb.Request_VALIDATION, 1},
{rpb.Request_CUSTOM, 0},
Expand Down Expand Up @@ -54,7 +54,7 @@ func TestProcess(t *testing.T) {

func TestPreloads(t *testing.T) {
tests := []struct {
html string
html string
expectedPreloads []string
}{
{
Expand Down Expand Up @@ -153,11 +153,11 @@ func TestError(t *testing.T) {

func TestRequireAMPAttribute(t *testing.T) {
tests := []struct {
desc string
html string
expectedError bool
expectedErrorInAMP bool
expectedErrorInAMP4Ads bool
desc string
html string
expectedError bool
expectedErrorInAMP bool
expectedErrorInAMP4Ads bool
expectedErrorInAMP4Email bool
}{
{
Expand Down
89 changes: 89 additions & 0 deletions transformer/transformers/stripJs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package transformers

import (
"regexp"
"strings"

"github.com/ampproject/amppackager/transformer/internal/amphtml"
"github.com/ampproject/amppackager/transformer/internal/htmlnode"
"golang.org/x/net/html/atom"
"golang.org/x/net/html"
)

// StripJS removes non-AMP javascript from the DOM.
// - For <script> elements, remove where any of the following is true:
// - has a src attribute whose value is not prefixed by https://cdn.ampproject.org/ (case-insensitive match).
// - It has no src attribute and no type attribute (case-insensitive match).
// - It has a type attribute whose value is neither application/json nor application/ld+json (case-insensitive match on both name and value).
//
// - For all other elements, remove any event attribute that matches "on[A-Za-z].*".
func StripJS(e *Context) error {
stripJS(e.DOM.RootNode)
return nil
}

// eventRE is a regexp to match event attributes, e.g. onClick
var eventRE = func() *regexp.Regexp {
r := regexp.MustCompile("^on[A-Za-z].*")
return r
}()

// stripJS recursively does the actual work on each node.
func stripJS(n *html.Node) {
if n.Type == html.ElementNode {
switch n.DataAtom {
case atom.Script:
srcVal, srcOk := htmlnode.GetAttributeVal(n, "src")
if srcOk {
if !strings.HasPrefix(strings.ToLower(srcVal), amphtml.AMPCacheRootURL) {
n.Parent.RemoveChild(n)
return
}
}
typeVal, typeOk := htmlnode.GetAttributeVal(n, "type")
if !srcOk && !typeOk {
n.Parent.RemoveChild(n)
return
}
if typeOk {
switch strings.ToLower(typeVal) {
case "application/json", "application/ld+json":
// ok to keep
default:
n.Parent.RemoveChild(n)
return
}
}
default:
for _, attr := range n.Attr {
if attr.Namespace == "" {
if match := eventRE.MatchString(attr.Key); match {
htmlnode.RemoveAttribute(n, &attr)
}
}
}
}
}

for c := n.FirstChild; c != nil; {
// Track the next sibling because if the node is removed in the recursive
// call, it becomes orphaned and the pointer to NextSibling is lost.
next := c.NextSibling
stripJS(c)
c = next
}
}
125 changes: 125 additions & 0 deletions transformer/transformers/stripJs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package transformers_test

import (
"strings"
"testing"

"github.com/ampproject/amppackager/transformer/internal/amphtml"
tt "github.com/ampproject/amppackager/transformer/internal/testing"
"github.com/ampproject/amppackager/transformer/transformers"
"golang.org/x/net/html"
)

func TestStripJS(t *testing.T) {
tcs := []tt.TestCase{
{
Desc: "strips script wrong src",
Input: "<script src=main.js/>",
Expected: "<head></head><body></body>",
},
{
Desc: "keeps script correct src",
Input: "<script async src=\"https://cdn.ampproject.org/v0.js\"></script>",
Expected: "<head><script async src=https://cdn.ampproject.org/v0.js></script></head>",
},
{
Desc: "keeps script correct src case-insensitive",
Input: "<script async custom-element='amp-analytics' src='https://cDn.AMPproject.org/v0/amp-analytics-0.1.js'></script>",
Expected: "<head><script async custom-element=amp-analytics src=https://cDn.AMPproject.org/v0/amp-analytics-0.1.js></script></head>",
},
{
Desc: "strips script no src, no type",
Input: "<script>foo</script>",
Expected: "<head></head><body></body>",
},
{
Desc: "strips script wrong type",
Input: "<script type=application/javascript>foo</script>",
Expected: "<head></head><body></body>",
},
{
Desc: "keeps script corect type",
Input: "<script type=application/json>foo</script>",
Expected: "<head><script type=application/json>foo</script></head><body></body>",
},
{
Desc: "strip tag attr ona",
Input: "<body><select ona=\"myFunction()\"></body>",
Expected: "<body><select></select></body>",
},
{
Desc: "strips tag event attr",
Input: "<body><select onchange=\"myFunction()\"></body>",
Expected: "<body><select></select></body>",
},
{
Desc: "strip tag attr onfoo",
Input: "<body><select onfoo=\"myFunction()\"></body>",
Expected: "<body><select></select></body>",
},
{
Desc: "keep tag attr 'on'",
Input: "<body><select on=\"myFunction()\"></body>",
Expected: "<body><select on=myFunction()></select></body>",
},
{
Desc: "keep tag attr on-foo",
Input: "<body><select on-foo=\"myFunction()\"></body>",
Expected: "<body><select on-foo=myFunction()></select></body>",
},
{
Desc: "keep tag attr notonchange",
Input: "<body><select notonchange=\"myFunction()\"></body>",
Expected: "<body><select notonchange=myFunction()></select></body>",
},
}

for _, tc := range tcs {
inputDoc, err := html.Parse(strings.NewReader(tc.Input))
if err != nil {
t.Errorf("%s: html.Parse failed %q", tc.Input, err)
continue
}
inputDOM, err := amphtml.NewDOM(inputDoc)
if err != nil {
t.Errorf("%s\namphtml.NewDOM for %s failed %q", tc.Desc, tc.Input, err)
continue
}
transformers.StripJS(&transformers.Context{DOM: inputDOM})
var input strings.Builder
if err := html.Render(&input, inputDoc); err != nil {
t.Errorf("%s: html.Render failed %q", tc.Input, err)
continue
}

expectedDoc, err := html.Parse(strings.NewReader(tc.Expected))
if err != nil {
t.Errorf("%s: html.Parse failed %q", tc.Expected, err)
continue
}
var expected strings.Builder
err = html.Render(&expected, expectedDoc)
if err != nil {
t.Errorf("%s: html.Render failed %q", tc.Expected, err)
continue
}
if input.String() != expected.String() {
t.Errorf("%s: Transform=\n%q\nwant=\n%q", tc.Desc, &input, &expected)
}
}

}

0 comments on commit 310b16c

Please sign in to comment.