Skip to content

Commit

Permalink
#4292 - ollama-based recommender
Browse files Browse the repository at this point in the history
- Allow accessing annotations from the CAS in the prompt template
- Added some documentation
  • Loading branch information
reckart committed Jan 4, 2024
1 parent 6b4acd5 commit 631d247
Show file tree
Hide file tree
Showing 12 changed files with 276 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ include::{include-dir}projects_recommendation_string_relation.adoc[leveloffset=+

include::{include-dir}projects_recommendation_opennlp.adoc[leveloffset=+2]

include::{include-dir}projects_recommendation_ollama.adoc[leveloffset=+2]

include::{include-dir}projects_recommendation_conceptlinker.adoc[leveloffset=+2]

include::{include-dir}projects_recommendation_external.adoc[leveloffset=+2]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ raw=Raw prompt
preset=Preset
options=Advanced options

promptingMode=Processing mode
promptingMode=Prompting mode
PromptingMode.PER_ANNOTATION=Per annotation
PromptingMode.PER_SENTENCE=Per sentence
PromptingMode.PER_DOCUMENT=Per document
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava;

import java.util.AbstractMap;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.jcas.tcas.Annotation;

public class AnnotationWrapper
extends AbstractMap<String, Object>
{
private static final String COVERED_TEXT = "$coveredText";

private static final Set<String> FEATURE_BLACKLIST = Set.of(CAS.FEATURE_BASE_NAME_SOFA);

private final Annotation annotation;

public AnnotationWrapper(Annotation aAnnotation)
{
annotation = aAnnotation;
}

@Override
public Object get(Object aKey)
{
if (aKey instanceof String key) {
if (COVERED_TEXT.equals(key)) {
return annotation.getCoveredText();
}

var feature = annotation.getType().getFeatureByBaseName(key);
if (feature != null) {
return annotation.getFeatureValueAsString(feature);
}
}

return null;
}

@Override
public Set<String> keySet()
{
var features = annotation.getType().getFeatures().stream() //
.map(Feature::getShortName) //
.filter(name -> !FEATURE_BLACKLIST.contains(name));

var specials = Stream.of(COVERED_TEXT);

return Stream.concat(features, specials).collect(Collectors.toSet());
}

@Override
public Set<Entry<String, Object>> entrySet()
{
return keySet().stream().map(k -> new SimpleEntry<>(k, get(k))).collect(Collectors.toSet());
}

@Override
public String toString()
{
return annotation.getCoveredText();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava;

import java.util.List;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.tcas.Annotation;

public class CasWrapper
{
private final CAS cas;

public CasWrapper(CAS aCas)
{
cas = aCas;
}

public List<AnnotationWrapper> select(String aTypeName)
{
var type = getType(aTypeName);

return cas.<Annotation> select(type).map(AnnotationWrapper::new).toList();
}

private Type getType(String aName)
{
var type = cas.getTypeSystem().getType(aName);
if (type != null) {
return type;
}

for (var t : cas.getTypeSystem()) {
if (t.getShortName().equals(aName)) {
return t;
}
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine;
import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper;

public class PerAnnotationContextGenerator
implements PromptContextGenerator
Expand All @@ -40,6 +41,7 @@ public Stream<PromptContext> generate(RecommendationEngine aEngine, CAS aCas, in
.map(Sentence::getCoveredText) //
.findFirst().orElse("");
var context = new PromptContext(candidate);
context.set(VAR_CAS, new CasWrapper(aCas));
context.set(VAR_TEXT, candidate.getCoveredText());
context.set(VAR_SENTENCE, sentence);
return context;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.uima.cas.CAS;

import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine;
import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper;

public class PerDocumentContextGenerator
implements PromptContextGenerator
Expand All @@ -33,6 +34,7 @@ public Stream<PromptContext> generate(RecommendationEngine aEngine, CAS aCas, in
{
var candidate = aCas.getDocumentAnnotation();
var context = new PromptContext(candidate);
context.set(VAR_CAS, new CasWrapper(aCas));
context.set(VAR_TEXT, aCas.getDocumentText());
return Stream.of(context);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine;
import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava.CasWrapper;

public class PerSentenceContextGenerator
implements PromptContextGenerator
Expand All @@ -40,6 +41,7 @@ public Stream<PromptContext> generate(RecommendationEngine aEngine, CAS aCas, in

return candidates.stream().map(candidate -> {
var context = new PromptContext(candidate);
context.set(VAR_CAS, new CasWrapper(aCas));
context.set(VAR_TEXT, candidate.getCoveredText());
return context;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public interface PromptContextGenerator
static final String VAR_SENTENCE = "sentence";
static final String VAR_DOCUMENT = "document";
static final String VAR_EXAMPLES = "examples";
static final String VAR_CAS = "cas";

Stream<PromptContext> generate(RecommendationEngine aEngine, CAS aCas, int aBegin, int aEnd);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Licensed to the Technische Universität Darmstadt under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The Technische Universität Darmstadt
// licenses this file to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

== Ollama

====
CAUTION: Experimental feature. To use this functionality, you need to enable it first by adding `recommender.ollama.enabled=true` to the `settings.properties` file (see the <<admin-guide.adoc#sect_settings, Admin Guide>>).
====

This recommender allows to obtain annotation suggestions using large language models (LLMs) supported by link:https://ollama.ai[Ollama]. In order to use it, you first need to install Ollama and run it.

.Installing and running Ollama on macOS using homebrew
[source,sh]
----
$ brew install ollama
$ ollama pull mistral
$ ollama serve mistral
----

By default, Ollama runs on `http://localhost:11434/` and {product-name} uses this as the default endpoint for communicating with it. If you run Ollama on a different host (e.g. one that has a more powerful GPU) or port, you can adjust this URL in the recommdener settings.

If {product-name} can successfully connect to Ollama, the **model** combo-box will offer all models that are available on the respective endpoint. If you want to use a model that is not listed here, you first need to `ollama pull` it.

Now you can configure how to generate the prompts that are sent to Ollama and how to interpret its response using the following settings:

* **Prompting mode:** here you can choose to generate one prompt **per sentence**, **per annotation** or **per document**.
* **Response format:** here you can choose how to read the response from Ollama. The choice is between **default** (i.e. text) and a **JSON** format.
* **Extraction mode:** here you can choose how interpret the response from Ollama. The availability of different extraction modes depends on the type of layer for which the recommender is configured. Choose **response as label** e.g. for classification or summarization tasks. It puts the response from the LLM directly into the feature that you configured the recommender to operate on. Choose **Mentions from JSON** (span layer) for information extraction tasks where you ask the LLM e.g. to identify and categorize certain types of entities in the text.
* **Prompt:** Here you can finally define the prompt that is sent to Ollama. The prompt should usually consist of an instruction and a piece of text to which the instruction is to be applied. Depending on the prompting mode, there are different variables that can be used in the prompt. The most important variable is `text` and it corresponds to the sentence text, annotated words or document text, depending on the prompting mode.

The recommender comes with several example configurations that you can choose from a drop-down field.

Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
{{ text }}
```
- name: Summarize text
- name: Summarize document
promptingMode: per-document
extractionMode: response-as-label
prompt: |-
Expand All @@ -52,6 +52,18 @@
{{ text }}
```
- name: Summarize annotated spans
promptingMode: per-document
extractionMode: response-as-label
prompt: |-
Briefly summarize the following text.
```
{% for x in cas.select('custom.Span') %}
{{ x }}
{% endfor %}
```
- name: Disambiguate in sentence
promptingMode: per-annotation
extractionMode: response-as-label
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.recommendation.imls.ollama.jinjava;

import static de.tudarmstadt.ukp.inception.support.uima.AnnotationBuilder.buildAnnotation;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.Map;

import org.apache.uima.fit.factory.CasFactory;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import com.hubspot.jinjava.Jinjava;
import com.hubspot.jinjava.JinjavaConfig;

import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;

class CasWrapperTest
{
private Jinjava jinjava;

@BeforeEach
void setup()
{
var config = new JinjavaConfig();
jinjava = new Jinjava(config);
}

@Test
void thatSelectCanAccessAnnotationsFromCas() throws Exception
{
var script = """
{% for x in cas.select('NamedEntity') %}
{{ x }}{% endfor %}""";

var bindings = Map.of("test", "test");

var cas = CasFactory.createCas();
cas.setDocumentText("""
My name is John McCain.
His name is Mickey.""");
buildAnnotation(cas, NamedEntity.class).on("John McCain").buildAndAddToIndexes();
buildAnnotation(cas, NamedEntity.class).on("Mickey").buildAndAddToIndexes();

jinjava.getGlobalContext().put("cas", new CasWrapper(cas));

var result = jinjava.render(script, bindings);

assertThat(result).contains("John McCain\nMickey");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
</div>

<ul wicket:id="annotationsContainer" class="px-0 list-group list-group-flush">
<li wicket:id="annotations" class="list-group-item p-0 d-flex">
<li wicket:id="annotations" class="list-group-item p-0 d-flex border-bottom">
<div class="text-secondary bg-light-subtle border-end px-2 d-flex align-items-center">
<i class="fas fa-tag text-center" style="width: 1em;"></i>
</div>
Expand Down

0 comments on commit 631d247

Please sign in to comment.