Skip to content

Commit

Permalink
Fix issues with replaceWith
Browse files Browse the repository at this point in the history
1. The `keep` options are now respected when using the function-param
   variant of `replaceWith`.
2. `keep: case` now respects the case of the original target, instead of
   basing the decision on the first term of a sentence. I believe this is
   the expected behavior.
3. Unmatched replacements no longer crash when used with keep options.
   Demonstrated by: `replace-with-no-match`.
4. `pre` text is now preserved. Demonstrated by: `replace-with-phone-number`
5. Add type definitions for `keep`, there's some outdated documentation
   around that refers to old names for these params. Also, `possessives`
   was undocumented. This makes it easier to use correctly.
6. Add a test for `keep: possessives`.
  • Loading branch information
tony-scio committed Sep 22, 2024
1 parent 8565e74 commit fc4d122
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 23 deletions.
2 changes: 1 addition & 1 deletion builds/compromise.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.mjs

Large diffs are not rendered by default.

39 changes: 24 additions & 15 deletions src/1-one/change/api/replace.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
const dollarStub = /\$[0-9a-z]+/g
const fns = {}

const titleCase = function (str) {
return str.replace(/\w\S*/g, txt => txt.charAt(0).toUpperCase() + txt.substring(1).toLowerCase())
}
// case logic
const isTitleCase = (str) => /^\p{Lu}[\p{Ll}']/u.test(str) || /^\p{Lu}$/u.test(str)
const toTitleCase = (str) => str.replace(/^\p{Ll}/u, x => x.toUpperCase())
const toLowerCase = (str) => str.replace(/^\p{Lu}/u, x => x.toLowerCase())

// doc.replace('foo', (m)=>{})
const replaceByFn = function (main, fn) {
const replaceByFn = function (main, fn, keep) {
main.forEach(m => {
let out = fn(m)
m.replaceWith(out)
m.replaceWith(out, keep)
})
return main
}
Expand All @@ -35,10 +36,12 @@ fns.replaceWith = function (input, keep = {}) {
let main = this
this.uncache()
if (typeof input === 'function') {
return replaceByFn(main, input)
return replaceByFn(main, input, keep)
}
let terms = main.docs[0]
let isPossessive = keep.possessives && terms[terms.length - 1].tags.has('Possessive')
if (!terms) return main
let isOriginalPossessive = keep.possessives && terms[terms.length - 1].tags.has('Possessive')
let isOriginalTitleCase = keep.case && isTitleCase(terms[0].text)
// support 'foo $0' replacements
input = subDollarSign(input, main)

Expand All @@ -47,6 +50,8 @@ fns.replaceWith = function (input, keep = {}) {
ptrs = ptrs.map(ptr => ptr.slice(0, 3))
// original.freeze()
let oldTags = (original.docs[0] || []).map(term => Array.from(term.tags))
let originalPre = original.docs[0][0].pre
let originalPost = original.docs[0][original.docs[0].length - 1].post
// slide this in
if (typeof input === 'string') {
input = this.fromText(input).compute('id')
Expand All @@ -61,7 +66,7 @@ fns.replaceWith = function (input, keep = {}) {
main.delete(original) //science.

// keep "John's"
if (isPossessive) {
if (isOriginalPossessive) {
let tmp = main.docs[0]
let term = tmp[tmp.length - 1]
if (!term.tags.has('Possessive')) {
Expand All @@ -70,6 +75,11 @@ fns.replaceWith = function (input, keep = {}) {
term.tags.add('Possessive')
}
}

// try to keep some pre-post punctuation
if (originalPre) main.docs[0][0].pre = originalPre
if (originalPost && !main.docs[0][main.docs[0].length - 1].post.trim()) main.docs[0][main.docs[0].length - 1].post = originalPost

// what should we return?
let m = main.toView(ptrs).compute(['index', 'freeze', 'lexicon'])
if (m.world.compute.preTagger) {
Expand All @@ -82,16 +92,15 @@ fns.replaceWith = function (input, keep = {}) {
term.tagSafe(oldTags[i])
})
}

if (!m.docs[0] || !m.docs[0][0]) return m

// try to co-erce case, too
if (keep.case && m.docs[0] && m.docs[0][0] && m.docs[0][0].index[1] === 0) {
m.docs[0][0].text = titleCase(m.docs[0][0].text)
if (keep.case) {
let transformCase = isOriginalTitleCase ? toTitleCase : toLowerCase
m.docs[0][0].text = transformCase(m.docs[0][0].text)
}

// try to keep some pre-post punctuation
// if (m.terms().length === 1 && main.terms().length === 1) {
// console.log(original.docs)
// }

// console.log(input.docs[0])
// let regs = input.docs[0].map(t => {
// return { id: t.id, optional: true }
Expand Down
53 changes: 53 additions & 0 deletions tests/two/transform/replace.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ test('replace-with-function', function (t) {
doc.replace('#Person', fn)
t.equal(doc.text(), 'nancy and johnny', here + 'replace function')

doc = nlp('spencer and John')
doc.match('#Person').replaceWith(fn, { case: true })
t.equal(doc.text(), 'nancy and Johnny')

doc = nlp('Thurs, Feb 2nd, 2016')
doc.match('feb').replaceWith(m => {
return m.text({ trim: true }) + '!'
Expand All @@ -110,6 +114,19 @@ test('replace-with-function', function (t) {
t.end()
})

test('replace-with-possessives', function (t) {
const fn = p => {
if (p.has('john')) {
return 'johnny'
}
return 'nancy'
}
let doc = nlp('spencer\'s house is cooler than John\'s house.')
doc.replace('#Person', fn, { case: true, possessives: true })
t.equal(doc.text(), 'nancy\'s house is cooler than Johnny\'s house.')
t.end()
})

test('replace-tags-param', function (t) {
let doc = nlp('Spencer is very cool.')
doc.match('spencer').replaceWith('jogging')
Expand Down Expand Up @@ -180,3 +197,39 @@ test('replace is cloned', function (t) {

t.end()
})

test('replace-with-no-match', function (t) {
let doc = nlp('original')
doc.match('missing').replaceWith('unreached')
t.equal(doc.text(), 'original')

doc.match('missing').replaceWith('unreached', { tags: true, possessives: true, case: true })
t.equal(doc.text(), 'original')
t.end()
})

test('replace-with-phone-number', function (t) {
let doc = nlp('Phone: +1 (123) 456-7890')
doc.match('#PhoneNumber').replaceWith((p) => p.text().replace(/\d{3}/, '555'))
t.equal(doc.text(), 'Phone: +1 (555) 555-7890')

t.end()
})

test('replace-with-pre-post', function (t) {
let doc = nlp('`Target`')
doc.match('Target').replaceWith('Replacement')
t.equal(doc.text(), '`Replacement`')

doc = nlp('`John\'s`')
doc.match('John').replaceWith('Spencer', { possessives: true })
t.equal(doc.text(), '`Spencer\'s`')

doc = nlp('the `pit bull` played').match('#Noun+').replace('snake').all()
t.equal(doc.text(), 'the `snake` played')

doc = nlp('the `snake` played').match('#Noun+').replace('pit bull').all()
t.equal(doc.text(), 'the `pit bull` played')

t.end()
})
11 changes: 10 additions & 1 deletion types/view/one.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ import type { Document, Pointer, Groups, JsonProps, outMethods, matchOptions, Te

export type Matchable = string | View | Net | ParsedMatch

export interface ReplaceWithProps {
/** preserve the case of the original, ignoring the case of the replacement */
case?: boolean
/** preserve whether the original was a possessive */
possessives?: boolean
/** preserve all of the tags of the original, regardless of the tags of the replacement */
tags?: boolean
}

declare class View {
// Utils
/** is this document empty? */
Expand Down Expand Up @@ -158,7 +167,7 @@ declare class View {
/** search and replace match with new content */
replace: (from: string | View, to?: string | Function, keep?: object) => View
/** substitute-in new content */
replaceWith: (to: string | Function, keep?: object) => View
replaceWith: (to: string | Function, keep?: ReplaceWithProps) => View

/** remove any duplicate matches */
unique: () => View
Expand Down

0 comments on commit fc4d122

Please sign in to comment.