Skip to content

Commit

Permalink
Fixed typos in pretokenized code (issue #32) and in previous commit
Browse files Browse the repository at this point in the history
  • Loading branch information
marcverhagen committed Jun 10, 2024
1 parent d9ceef7 commit cc6b96a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ python app.py -t example-mmif.json out.json

### Configurable runtime parameters

For the full list of parameters, please refer to the app metadata from in the CLAMS App Directory at [https://apps.clams.ai/#spacy-wrapper](https://apps.clams.ai/clamsapp/) or the [`metadata.py`](metadata.py) file in this repository.
For the full list of parameters, please refer to the app metadata from in the CLAMS App Directory at [https://apps.clams.ai/#spacy-wrapper](https://apps.clams.ai/#spacy-wrapper) or the [`metadata.py`](metadata.py) file in this repository.

### Input and output details

Expand Down
16 changes: 6 additions & 10 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
"""
DELETE THIS MODULE STRING AND REPLACE IT WITH A DESCRIPTION OF YOUR APP.
app.py Template
The app.py script does several things:
- import the necessary code
- create a subclass of ClamsApp that defines the metadata and provides a method to run the wrapped NLP tool
- provide a way to run the code as a RESTful Flask service
Wrapper for the Python spaCy library.
"""

Expand Down Expand Up @@ -48,8 +41,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
for doc in mmif_obj.get_documents_by_type(DocumentTypes.TextDocument):
in_doc = None
tok_idx = {}
if 'pretokenizd' in parameters and parameters['pretokenized']:
for view in mmif_obj.get_Views_for_document(doc.id):
if parameters.get('pretokenized') is True:
for view in mmif_obj.get_views_for_document(doc.id):
if Uri.TOKEN in view.metadata.contains:
tokens = [token.get_property('text') for token in view.get_annotations(Uri.TOKEN)]
tok_idx = {i : f'{view.id}:{token.id}'
Expand All @@ -71,6 +64,7 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
for n, tok in enumerate(in_doc):
a = view.new_annotation(Uri.TOKEN)
if n not in tok_idx:
print(88888888)
a.add_property("start", tok.idx)
a.add_property("end", tok.idx + len(tok))
tok_idx[n] = a.id
Expand Down Expand Up @@ -105,7 +99,9 @@ def _test(infile, outfile):
% (view.id, len(view.annotations), view.metadata['app']))



if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("--port", action="store", default="5000", help="set port to listen")
parser.add_argument("--production", action="store_true", help="run gunicorn server")
Expand Down

0 comments on commit cc6b96a

Please sign in to comment.