Skip to content

Commit

Permalink
feat: parse html content (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickhulce authored Mar 6, 2017
1 parent 02d2074 commit f06170a
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 2 deletions.
3 changes: 3 additions & 0 deletions lib/sources/factory.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const _ = require('lodash')

const SimpleSource = require('./simple-source')
const JsSource = require('./js-source')
const HtmlSource = require('./html-source')

const FILE_PREFIX = 'file://'

Expand Down Expand Up @@ -66,6 +67,8 @@ class SourceFactory {
static _createSource(file, opts) {
if (file.type === 'js' && !opts.simple) {
return new JsSource(file.content)
} else if (file.type === 'html' && !opts.simple) {
return new HtmlSource(file.content)
}

return new SimpleSource(file.content)
Expand Down
71 changes: 71 additions & 0 deletions lib/sources/html-source.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const _ = require('lodash')
const HtmlParser = require('htmlparser2').Parser

const debug = require('debug')('nukecss:html-source')
const SimpleSource = require('./simple-source')

class HtmlSource {
constructor(text, opts = {}) {
this._text = text

let tokens = opts.tokens
if (!tokens) {
try {
tokens = HtmlSource.tokenizeHtml(text, opts)
} catch (err) {
debug(err)
return
}
}

this._tokens = tokens
this._tokensArray = Array.from(tokens)
}

get type() {
return 'html'
}

_findWholeSelectorInTokens(selector) {
return this._tokensArray.find(token => SimpleSource.textContains(token, selector))
}

contains(selector) {
if (this._tokens) {
return Boolean(this._tokens.has(selector) ||
this._findWholeSelectorInTokens(selector))
} else {
return SimpleSource.textContains(this._text, selector)
}
}

join(that) {
if (that.type !== 'html') {
throw new Error('HtmlSource can only be joined with HtmlSource')
}

const thisTokens = this._tokensArray || []
const thatTokens = that._tokensArray || []
const tokens = new Set(thisTokens.concat(thatTokens))
const joiner = '\n<!-- joined by nukecss -->\n'
return new HtmlSource(`${this._text}${joiner}${that._text}`, {tokens})
}

static tokenizeHtml(text) {
const tokens = new Set()

const parser = new HtmlParser({
onopentag(name, attributes) {
[name, attributes.id, attributes.class]
.filter(candidate => typeof candidate === 'string')
.forEach(candidate => tokens.add(candidate))
},
}, {decodeEntities: true})

parser.write(text)
parser.end()
return tokens
}
}

module.exports = HtmlSource
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"esprima": "^3.1.3",
"glob": "^7.1.1",
"gonzales-pe": "^4.0.3",
"htmlparser2": "^3.9.2",
"lodash": "^4.17.4",
"postcss": "^5.2.15"
},
Expand Down
10 changes: 9 additions & 1 deletion test/sources/factory.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,22 @@ describe('sources/factory.js', () => {
expect(sources).to.have.length(2)
})

it('should use the proper source', () => {
it('should use JsSource', () => {
const content = 'const foobar = "baz"'
const sources = SourceFactory.fromObject({content, type: 'js'})
expect(sources).to.have.length(1)
expect(sources[0].contains('foobar')).to.equal(false)
expect(sources[0].contains('baz')).to.equal(true)
})

it('should use HtmlSource', () => {
const content = '<html><p class="baz">foobar</p></html>'
const sources = SourceFactory.fromObject({content, type: 'html'})
expect(sources).to.have.length(1)
expect(sources[0].contains('foobar')).to.equal(false)
expect(sources[0].contains('baz')).to.equal(true)
})

it('should infer the proper source type from the extension', () => {
const filePath = path.join(__dirname, '../fixtures/content.js')
const sources = SourceFactory.fromObject({path: filePath})
Expand Down
91 changes: 91 additions & 0 deletions test/sources/html-source.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const SimpleSource = require('../../lib/sources/simple-source.js')
const HtmlSource = require('../../lib/sources/html-source.js')

describe('sources/html-source.js', () => {
it('should return the type', () => {
expect(new HtmlSource('<html></html>')).to.have.property('type', 'html')
})

describe('#join', () => {
it('should join to another HtmlSource', () => {
const sourceA = new HtmlSource('<html></html>')
const sourceB = new HtmlSource('<html></html>')
expect(sourceA.join(sourceB)).to.have.property('type', 'html')
expect(sourceB.join(sourceA)).to.have.property('type', 'html')
})

it('should join to a malformed HtmlSource', () => {
const sourceA = new HtmlSource('<html></html>')
const sourceB = new HtmlSource('<html')
expect(sourceA.join(sourceB)).to.have.property('type', 'html')
expect(sourceB.join(sourceA)).to.have.property('type', 'html')
})

it('should not join to another non-HtmlSource', () => {
const sourceA = new HtmlSource('<html></html>')
const sourceB = new SimpleSource('other content')
expect(() => sourceA.join(sourceB)).to.throw()
expect(() => sourceB.join(sourceA)).to.throw()
})
})

describe('#contains', () => {
context('when html is simple', () => {
const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<title>Basic HTML Example</title>
<link href="app.css" rel="stylesheet">
<script src="app.js" type="text/javascript"></script>
</head>
<body>
<div id="my-hero-element" class="container">
<div class="several classes in-a-row">
<h1>My Header</h1>
<p class="lead">Examplelongtext</p>
</div>
</div>
<script>
const myJsVar = "my-javascript-class"
</script>
</body>
</html>
`

const source = new HtmlSource(html)

it('should find tokens as elements', () => {
expect(source.contains('div')).to.equal(true)
expect(source.contains('h1')).to.equal(true)
expect(source.contains('p')).to.equal(true)
expect(source.contains('script')).to.equal(true)
})

it('should find tokens as classes', () => {
expect(source.contains('container')).to.equal(true)
expect(source.contains('lead')).to.equal(true)
})

it('should find tokens as multiple classes', () => {
expect(source.contains('several')).to.equal(true)
expect(source.contains('classes')).to.equal(true)
expect(source.contains('in-a-row')).to.equal(true)
})

it('should find tokens as identifiers', () => {
expect(source.contains('my-hero-element')).to.equal(true)
})

it('should not find tokens as other attribtues', () => {
expect(source.contains('stylesheet')).to.equal(false)
expect(source.contains('javascript')).to.equal(false)
})

it('should not find tokens as text', () => {
expect(source.contains('Header')).to.equal(false)
expect(source.contains('examplelongtext')).to.equal(false)
})
})
})
})
45 changes: 44 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,34 @@ [email protected], doctrine@^1.2.2:
esutils "^2.0.2"
isarray "^1.0.0"

dom-serializer@0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.1.0.tgz#073c697546ce0780ce23be4a28e293e40bc30c82"
dependencies:
domelementtype "~1.1.1"
entities "~1.1.1"

domelementtype@1, domelementtype@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.0.tgz#b17aed82e8ab59e52dd9c19b1756e0fc187204c2"

domelementtype@~1.1.1:
version "1.1.3"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.1.3.tgz#bd28773e2642881aec51544924299c5cd822185b"

domhandler@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.3.0.tgz#2de59a0822d5027fabff6f032c2b25a2a8abe738"
dependencies:
domelementtype "1"

domutils@^1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf"
dependencies:
dom-serializer "0"
domelementtype "1"

dot-prop@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-3.0.0.tgz#1b708af094a49c9a0e7dbcad790aba539dac1177"
Expand Down Expand Up @@ -621,6 +649,10 @@ enhance-visitors@^1.0.0:
dependencies:
lodash "^4.13.1"

entities@^1.1.1, entities@~1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.1.tgz#6e5c2d0a5621b5dadaecef80b90edfb5cd7772f0"

error-ex@^1.2.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.0.tgz#e67b43f3e82c96ea3a584ffee0b9fc3325d802d9"
Expand Down Expand Up @@ -1240,6 +1272,17 @@ hosted-git-info@^2.1.4, hosted-git-info@^2.1.5:
version "2.2.0"
resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.2.0.tgz#7a0d097863d886c0fabbdcd37bf1758d8becf8a5"

htmlparser2@^3.9.2:
version "3.9.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.9.2.tgz#1bdf87acca0f3f9e53fa4fcceb0f4b4cbb00b338"
dependencies:
domelementtype "^1.3.0"
domhandler "^2.3.0"
domutils "^1.5.1"
entities "^1.1.1"
inherits "^2.0.1"
readable-stream "^2.0.2"

http-signature@~1.1.0:
version "1.1.1"
resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.1.1.tgz#df72e267066cd0ac67fb76adf8e134a8fbcf91bf"
Expand Down Expand Up @@ -1269,7 +1312,7 @@ inflight@^1.0.4:
once "^1.3.0"
wrappy "1"

inherits@2, inherits@^2.0.3, inherits@~2.0.0, inherits@~2.0.1:
inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@~2.0.0, inherits@~2.0.1:
version "2.0.3"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"

Expand Down

0 comments on commit f06170a

Please sign in to comment.