Skip to content

Commit

Permalink
updated to sdk 1.0.9, spacy 3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
keighrim committed Jul 24, 2023
1 parent 555b3cc commit ce95ecc
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 65 deletions.
3 changes: 2 additions & 1 deletion Containerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use the same base image version as the clams-python python library version
FROM ghcr.io/clamsproject/clams-python:1.0.7
FROM ghcr.io/clamsproject/clams-python:1.0.9
# See https://github.com/orgs/clamsproject/packages?tab=packages&q=clams-python for more base images
# IF you want to automatically publish this image to the clamsproject organization,
# 1. you should have generated this template without --no-github-actions flag
Expand All @@ -24,6 +24,7 @@ ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}
COPY ./ /app
WORKDIR /app
RUN pip3 install -r requirements.txt
RUN python3 -m spacy download en_core_web_sm

RUN python3 -m spacy download en_core_web_sm

Expand Down
48 changes: 10 additions & 38 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,48 +1,20 @@
# Spacy NLP Service

The spaCy NLP tool wrapped as a CLAMS service, spaCy is distributed under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
## Description
The spaCy NLP wrapped as a CLAMS service, spaCy is distributed under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).

This requires Python 3.8 or higher. For local install of required Python modules see [requirements.txt](requirements.txt).

## Using this service

Use `python app.py -t example-mmif.json out.json` just to test the wrapping code without using a server. To test this using a server you run the app as a service in one terminal:
## User instruction

```bash
$ python app.py
```
General user instructions for CLAMS apps is available at [CLAMS Apps documentation](https://apps.clams.ai/clamsapp).

And poke at it from another:
### System requirements

```bash
$ curl http://0.0.0.0:5000/
$ curl -H "Accept: application/json" -X POST [email protected] http://0.0.0.0:5000/
```

In CLAMS you usually run this in a container. To create an image

```bash
$ docker build -f Containerfile -t clams-spacy-wrapper .
```
This requires Python 3.8 or higher. For local install of required Python modules see [requirements.txt](requirements.txt).

And to run it as a container:
#### Using as local python programe

```bash
$ docker run --rm -d -p 5000:5000 clams-spacy-wrapper
$ curl -H "Accept: application/json" -X POST [email protected] http://0.0.0.0:5000/
```
Use `python app.py -t example-mmif.json out.json` just to test the wrapping code without using a server.

The spaCy code will run on each text document in the input MMIF file. The file `example-mmif.json` has one text document in the top level `documents` property and two text documents in one of the views. The text documents all look as follows:
### Configurable runtime parameter

```json
{
"@type": "http://mmif.clams.ai/0.4.0/vocabulary/TextDocument",
"properties": {
"id": "m2",
"text": {
"@value": "Hello, this is Jim Lehrer with the NewsHour on PBS...."
}
}
}
```
Instead of a `text:@value` property the text could in an external file, which would be given as a URI in the `location` property. See the readme file in [https://github.com/clamsproject/app-nlp-example](https://github.com/clamsproject/app-nlp-example) on how to do this.
For the full list of parameters, please refer to the app metadata from [CLAMS App Directory](https://apps.clams.ai/clamsapp/) or [`metadata.py`](metadata.py) file in this repository.
34 changes: 17 additions & 17 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,29 @@
"""

import argparse
import logging
from typing import Union

# Imports needed for Clams and MMIF.
# Non-NLP Clams applications will require AnnotationTypes

import spacy
from clams import ClamsApp, Restifier
from lapps.discriminators import Uri
from mmif import Mmif, View, Annotation, Document, AnnotationTypes, DocumentTypes
from spacy.tokens import Doc

# For an NLP tool we need to import the LAPPS vocabulary items
from lapps.discriminators import Uri

# Spacy imports
import spacy
from spacy.tokens import Doc
# Imports needed for Clams and MMIF.
# Non-NLP Clams applications will require AnnotationTypes

class SpacyWrapper(ClamsApp):

def __init__(self):
super().__init__()
# load small English core model
self.nlp = spacy.load("en_core_web_sm")
# Load small English core model
try:
self.nlp = spacy.load("en_core_web_sm")
except OSError as e: # spacy raises OSError if model not found
spacy.cli.download("en_core_web_sm")
self.nlp = spacy.load("en_core_web_sm")

def _appmetadata(self):
# see metadata.py
Expand Down Expand Up @@ -106,13 +108,11 @@ def _test(infile, outfile):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--port", action="store", default="5000", help="set port to listen"
)
parser.add_argument("--port", action="store", default="5000", help="set port to listen")
parser.add_argument("--production", action="store_true", help="run gunicorn server")
parser.add_argument('-t', '--test', action='store_true', help="bypass the server")
parser.add_argument('infile', nargs='?', help="input MMIF file")
parser.add_argument('outfile', nargs='?', help="output file")
parser.add_argument('infile', nargs='?', help="input MMIF file, only with --test")
parser.add_argument('outfile', nargs='?', help="output file, only with --test")

parsed_args = parser.parse_args()

Expand All @@ -122,11 +122,11 @@ def _test(infile, outfile):
# create the app instance
app = SpacyWrapper()

http_app = Restifier(app, port=int(parsed_args.port)
)
http_app = Restifier(app, port=int(parsed_args.port))
# for running the application in production mode
if parsed_args.production:
http_app.serve_production()
# development mode
else:
app.logger.setLevel(logging.DEBUG)
http_app.run()
12 changes: 7 additions & 5 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
DO NOT CHANGE the name of the file
"""

from mmif import DocumentTypes, AnnotationTypes
from lapps.discriminators import Uri
import re

from clams.app import ClamsApp
from clams.appmetadata import AppMetadata
import re
from lapps.discriminators import Uri
from mmif import DocumentTypes


# DO NOT CHANGE the function name
def appmetadata() -> AppMetadata:
Expand All @@ -26,7 +28,7 @@ def appmetadata() -> AppMetadata:
name="CLAMS wrapper for spaCy NLP",
description="Apply spaCy NLP to all text documents in a MMIF file.",
app_license="Apache 2.0",
identifier=f"http://apps.clams.ai/spacy-wrapper",
identifier=f"spacy-wrapper",
url='https://github.com/clamsproject/app-spacy-wrapper',
analyzer_version=[l.strip().rsplit('==')[-1] for l in open('requirements.txt').readlines() if re.match(r'^spacy==', l)][0],
analyzer_license='MIT'
Expand Down Expand Up @@ -58,4 +60,4 @@ def appmetadata() -> AppMetadata:
metadata = appmetadata()
for param in ClamsApp.universal_parameters:
metadata.add_parameter(**param)
sys.stdout.write(appmetadata().jsonify(pretty=True))
sys.stdout.write(metadata.jsonify(pretty=True))
6 changes: 2 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
# Make sure clams-python version is explicitly specified, at least the lower bound
clams-python==1.0.7
spacy==3.1.2
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0.tar.gz#egg=en_core_web_sm
clams-python==1.0.9
spacy==3.6

0 comments on commit ce95ecc

Please sign in to comment.