This repository has been archived by the owner on Aug 19, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbisheng.js
117 lines (96 loc) · 3.5 KB
/
bisheng.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
const cheerio = require('cheerio')
const alphaNumericRe = /(^|[\W^.^ ^-])((\w|\.| |-)+)(\s|\S|$)/g
const cjkRe = /[\u4e00-\u9fff]/ // CJK Unified Ideographs
const simpleChinesePunctuationRe = /(。|,|、|?)|(「|《|()|(」|》|))/
const chinesePunctuationRe = new RegExp(`(^|\\s|\\S)(${simpleChinesePunctuationRe.toString().slice(1, -1)})(\\s|\\S|$)`, 'g')
function getBeforeNodeLastWord($, node) {
if (!node.parent) return ''
return $(node.parent.prev).text().slice(-1) // tricky, used private interface
}
function getAfterNodeFirstWord($, node) {
if (!node.parent) return ''
return $(node.parent.next).text().slice(0, 1)
}
function addWhiteSpace(text, $, node) {
let result = alphaNumericRe.exec(text)
const modifier = []
while(result) {
let [, begin, target, , end] = result
const beginIndex = result.index + begin.length
const endIndex = alphaNumericRe.lastIndex - end.length
alphaNumericRe.lastIndex -= end.length
if (!begin) {
begin = getBeforeNodeLastWord($, node)
}
if (!end) {
end = getAfterNodeFirstWord($, node)
}
const spaceStart = cjkRe.test(begin)
const spaceEnd = cjkRe.test(end)
if (spaceStart || spaceEnd) {
modifier.push([target, beginIndex, endIndex, spaceStart, spaceEnd])
}
result = alphaNumericRe.exec(text)
}
while(modifier.length) {
const [target, begin, end, spaceStart, spaceEnd] = modifier.pop()
text = `${text.slice(0, begin)}${spaceStart ? ' ' : ''}${target}${spaceEnd ? ' ' : ''}${text.slice(end)}`
}
return text
}
function addSpanForPunctuation(text) {
let result = chinesePunctuationRe.exec(text)
const modifier = []
while(result) {
console.log(result)
let [target, begin, ,targetNormal, targetOpen, targetClose, end] = result
const beginIndex = result.index + begin.length
const endIndex = chinesePunctuationRe.lastIndex - end.length
chinesePunctuationRe.lastIndex -= end.length
const isPunctuationAfter = simpleChinesePunctuationRe.test(end)
if (isPunctuationAfter && target.length > 2) {
chinesePunctuationRe.lastIndex -= 1
}
modifier.push({
target: targetNormal || targetOpen || targetClose,
begin: beginIndex,
end: endIndex,
type: (function() {
if (targetNormal) {
return 'normal'
} else if (targetOpen) {
return 'open'
} else if (targetClose) {
return 'close'
}
}()),
isPunctuationBefore: simpleChinesePunctuationRe.test(begin),
isPunctuationAfter,
})
result = chinesePunctuationRe.exec(text)
}
while(modifier.length) {
const {target, begin, end, type, isPunctuationBefore, isPunctuationAfter} = modifier.pop()
text = `${text.slice(0, begin)}<span class="punctuation ppt-${type}\
${isPunctuationAfter ? ' ptt-after' : ''}${isPunctuationBefore ? ' ppt-before' : ''} \
"><span>${target}</span></span>${text.slice(end)}`
}
return text
}
function traverseAndReplace($, nodes) {
nodes.each((idx, parent) => {
const childNodes = $(parent).contents()
const tagNodes = childNodes.filter((_, node) => node.type === 'tag')
const textNodes = childNodes.filter((_, node) => node.type === 'text')
traverseAndReplace($, tagNodes)
textNodes.each((_, node) => {
const text = $(node).text()
$(node).replaceWith(addSpanForPunctuation(addWhiteSpace(text, $, node), $, node))
})
})
}
module.exports = (html) => {
const $ = cheerio.load(html, {decodeEntities: false})
traverseAndReplace($, $.root())
return $.html()
}