Skip to content

Commit

Permalink
Merge pull request #114 from Roboy/question-detection-improvement
Browse files Browse the repository at this point in the history
Improve question detection
  • Loading branch information
ec-m authored Jul 2, 2019
2 parents 899cf91 + b0beb18 commit 19adeaa
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
15 changes: 11 additions & 4 deletions modules/ravestate_nlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def roboy_getter(doc) -> bool:
with rs.Module(name="nlp"):

prop_tokens = rs.Property(name="tokens", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
porp_postags = rs.Property(name="postags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_postags = rs.Property(name="postags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_lemmas = rs.Property(name="lemmas", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_tags = rs.Property(name="tags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_ner = rs.Property(name="ner", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
Expand All @@ -55,7 +55,7 @@ def roboy_getter(doc) -> bool:
sig_intent_play = rs.Signal(name="intent-play")


@rs.state(read=rawio.prop_in, write=(prop_tokens, porp_postags, prop_lemmas, prop_tags, prop_ner, prop_triples, prop_roboy, prop_yesno))
@rs.state(read=rawio.prop_in, write=(prop_tokens, prop_postags, prop_lemmas, prop_tags, prop_ner, prop_triples, prop_roboy, prop_yesno))
def nlp_preprocess(ctx):
text = ctx[rawio.prop_in]
if not text:
Expand All @@ -68,7 +68,7 @@ def nlp_preprocess(ctx):
logger.info(f"[NLP:tokens]: {nlp_tokens}")

nlp_postags = tuple(str(token.pos_) for token in nlp_doc)
ctx[porp_postags] = nlp_postags
ctx[prop_postags] = nlp_postags
logger.info(f"[NLP:postags]: {nlp_postags}")

nlp_lemmas = tuple(str(token.lemma_) for token in nlp_doc)
Expand All @@ -84,6 +84,7 @@ def nlp_preprocess(ctx):
logger.info(f"[NLP:ner]: {nlp_ner}")

nlp_triples = nlp_doc._.triples
nlp_triples[0].set_yesno_question(detect_yesno_question(nlp_postags))
ctx[prop_triples] = nlp_triples
logger.info(f"[NLP:triples]: {nlp_triples}")

Expand All @@ -101,7 +102,7 @@ def nlp_contains_roboy_signal(ctx):
return rs.Emit()
return False

@rs.state(signal=sig_is_question, read=prop_triples)
@rs.state(signal=sig_is_question, read=(prop_triples, prop_tags))
def nlp_is_question_signal(ctx):
if ctx[prop_triples][0].is_question():
return rs.Emit()
Expand All @@ -114,3 +115,9 @@ def nlp_intent_play_signal(ctx):
return rs.Emit()
return False

def detect_yesno_question(tags):
"""
tests whether the prop_tags indicate that a yesno-question was asked
"""
return tags[0] in {'VBP', 'VBD', 'VBZ', 'MD'} and tags[1] in {'PRP', 'DT'} or \
tags[0] in {'VBP', 'VBD', 'VBZ', 'MD'} and tags[1] == 'RB' and tags[2] in {'PRP', 'DT'}
11 changes: 10 additions & 1 deletion modules/ravestate_nlp/triple.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Triple:
_subject: Union[Token, QuestionWord]
_predicate: Token
_object: Union[Token, QuestionWord]
_yesno_question: bool

def __init__(self, subject: Token = None, predicate: Token = None, object: Token = None):
self.set_subject(subject)
Expand All @@ -24,6 +25,9 @@ def set_predicate(self, predicate: Token):
def set_object(self, object: Union[Token, QuestionWord]):
self._object = object

def set_yesno_question(self, is_yesno: bool):
self._yesno_question = is_yesno

def get_subject(self) -> Union[Token, QuestionWord]:
return self._subject

Expand All @@ -33,6 +37,9 @@ def get_predicate(self) -> Token:
def get_object(self) -> Union[Token, QuestionWord]:
return self._object

def get_yesno_question(self) -> bool:
return self._yesno_question

def has_subject(self) -> bool:
return self._subject and len(self._subject.text.strip()) > 0

Expand Down Expand Up @@ -87,7 +94,9 @@ def is_question(self, question_word: Optional[str] = None):
if question_word:
return self.match_either_lemma(obj={question_word}, subj={question_word})
else:
return isinstance(self._subject, QuestionWord) or isinstance(self._object, QuestionWord)
return self._yesno_question \
or isinstance(self._subject, QuestionWord) \
or isinstance(self._object, QuestionWord)

def ensure_notnull(self, empty_token):
# do not allow empty entries in triple
Expand Down
4 changes: 2 additions & 2 deletions test/modules/ravestate_nlp/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from ravestate_nlp import nlp_preprocess, prop_tokens, porp_postags, prop_lemmas, prop_tags, prop_ner, prop_roboy
from ravestate_nlp import nlp_preprocess, prop_tokens, prop_postags, prop_lemmas, prop_tags, prop_ner, prop_roboy
from ravestate_rawio import prop_in as raw_in
from testfixtures import log_capture

Expand All @@ -23,7 +23,7 @@ def test_tokenization(capture, basic_input):
def test_postags(capture, basic_input):
nlp_preprocess(basic_input)
expected = ('INTJ', 'NOUN', 'DET', 'NOUN', 'VERB', 'ADJ')
assert basic_input[porp_postags] == expected
assert basic_input[prop_postags] == expected
capture.check_present((f"{FILE_NAME}", 'INFO', f"[NLP:postags]: {expected}"))


Expand Down

0 comments on commit 19adeaa

Please sign in to comment.