Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve question detection #114

Merged
merged 5 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions modules/ravestate_nlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def roboy_getter(doc) -> bool:
with rs.Module(name="nlp"):

prop_tokens = rs.Property(name="tokens", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
porp_postags = rs.Property(name="postags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_postags = rs.Property(name="postags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
josephbirkner marked this conversation as resolved.
Show resolved Hide resolved
prop_lemmas = rs.Property(name="lemmas", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_tags = rs.Property(name="tags", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
prop_ner = rs.Property(name="ner", default_value="", always_signal_changed=True, allow_pop=False, allow_push=False)
Expand All @@ -55,7 +55,7 @@ def roboy_getter(doc) -> bool:
sig_intent_play = rs.Signal(name="intent-play")


@rs.state(read=rawio.prop_in, write=(prop_tokens, porp_postags, prop_lemmas, prop_tags, prop_ner, prop_triples, prop_roboy, prop_yesno))
@rs.state(read=rawio.prop_in, write=(prop_tokens, prop_postags, prop_lemmas, prop_tags, prop_ner, prop_triples, prop_roboy, prop_yesno))
def nlp_preprocess(ctx):
text = ctx[rawio.prop_in]
if not text:
Expand All @@ -68,7 +68,7 @@ def nlp_preprocess(ctx):
logger.info(f"[NLP:tokens]: {nlp_tokens}")

nlp_postags = tuple(str(token.pos_) for token in nlp_doc)
ctx[porp_postags] = nlp_postags
ctx[prop_postags] = nlp_postags
logger.info(f"[NLP:postags]: {nlp_postags}")

nlp_lemmas = tuple(str(token.lemma_) for token in nlp_doc)
Expand All @@ -84,6 +84,7 @@ def nlp_preprocess(ctx):
logger.info(f"[NLP:ner]: {nlp_ner}")

nlp_triples = nlp_doc._.triples
nlp_triples[0].set_yesno_question(detect_yesno_question(nlp_postags))
ctx[prop_triples] = nlp_triples
logger.info(f"[NLP:triples]: {nlp_triples}")

Expand All @@ -101,7 +102,7 @@ def nlp_contains_roboy_signal(ctx):
return rs.Emit()
return False

@rs.state(signal=sig_is_question, read=prop_triples)
@rs.state(signal=sig_is_question, read=(prop_triples, prop_tags))
def nlp_is_question_signal(ctx):
if ctx[prop_triples][0].is_question():
return rs.Emit()
josephbirkner marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -114,3 +115,9 @@ def nlp_intent_play_signal(ctx):
return rs.Emit()
return False

def detect_yesno_question(tags):
"""
tests whether the prop_tags indicate that a yesno-question was asked
"""
return tags[0] in {'VBP', 'VBD', 'VBZ', 'MD'} and tags[1] in {'PRP', 'DT'} or \
tags[0] in {'VBP', 'VBD', 'VBZ', 'MD'} and tags[1] == 'RB' and tags[2] in {'PRP', 'DT'}
11 changes: 10 additions & 1 deletion modules/ravestate_nlp/triple.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Triple:
_predicate: Token
_predicate_aux: Token
_object: Union[Token, QuestionWord]
_yesno_question: bool

def __init__(self, subject: Token = None, predicate: Token = None, predicate_aux: Token=None, object: Token = None):
self.set_subject(subject)
Expand All @@ -28,6 +29,9 @@ def set_predicate_aux(self, predicate_aux: Token):
def set_object(self, object: Union[Token, QuestionWord]):
self._object = object

def set_yesno_question(self, is_yesno: bool):
self._yesno_question = is_yesno

def get_subject(self) -> Union[Token, QuestionWord]:
return self._subject

Expand All @@ -40,6 +44,9 @@ def get_predicate_aux(self) -> Token:
def get_object(self) -> Union[Token, QuestionWord]:
return self._object

def get_yesno_question(self) -> bool:
return self._yesno_question

def has_subject(self) -> bool:
return self._subject and len(self._subject.text.strip()) > 0

Expand Down Expand Up @@ -79,7 +86,9 @@ def is_question(self, question_word: Optional[str] = None):
if question_word:
return self.match_either_lemma(obj={question_word}, subj={question_word})
else:
return isinstance(self._subject, QuestionWord) or isinstance(self._object, QuestionWord)
return self._yesno_question \
or isinstance(self._subject, QuestionWord) \
or isinstance(self._object, QuestionWord)

def ensure_notnull(self, empty_token):
# do not allow empty entries in triple
Expand Down
4 changes: 2 additions & 2 deletions test/modules/ravestate_nlp/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from ravestate_nlp import nlp_preprocess, prop_tokens, porp_postags, prop_lemmas, prop_tags, prop_ner, prop_roboy
from ravestate_nlp import nlp_preprocess, prop_tokens, prop_postags, prop_lemmas, prop_tags, prop_ner, prop_roboy
from ravestate_rawio import prop_in as raw_in
from testfixtures import log_capture

Expand All @@ -23,7 +23,7 @@ def test_tokenization(capture, basic_input):
def test_postags(capture, basic_input):
nlp_preprocess(basic_input)
expected = ('INTJ', 'NOUN', 'DET', 'NOUN', 'VERB', 'ADJ')
assert basic_input[porp_postags] == expected
assert basic_input[prop_postags] == expected
capture.check_present((f"{FILE_NAME}", 'INFO', f"[NLP:postags]: {expected}"))


Expand Down