Skip to content

Commit

Permalink
Fix bug when showing links in the result table (#65)
Browse files Browse the repository at this point in the history
In the file `backernd/static/js/helper.js` the line `link = cpy.match(/(https?:\/\/[a-zA-Z0-9.:%/#\?_-]+)/g)[0];` led to issues with results of the `encode_for_uri` function (see ad-freiburg/qlever#1128), because of a missing check if the `.match()` function actually finds any result.
  • Loading branch information
hannahbast authored and Hannah Bast committed Dec 10, 2023
2 parents 14cc93a + 7b1d7c8 commit 7a1077e
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 71 deletions.
2 changes: 0 additions & 2 deletions backend/management/commands/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,3 @@ def normalize_query(self, query):
query = query.strip()

return query


203 changes: 150 additions & 53 deletions backend/management/commands/warmup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from backend.models import Backend
import requests
import sys
import re


class Command(BaseCommand):
Expand All @@ -12,19 +13,25 @@ class Command(BaseCommand):
help += '\n - pin'
help += '\n - clear'
help += '\n - clear-unpinned'
help += '\n - queries'
help += '\n - show-all-ac-queries'

# Initialization. The `backend` and `warmupQueries` will be set first thing
# in `handle` below.
def __init__(self, *args, **kwargs):
self._logs = []
super().__init__( *args, **kwargs)
super().__init__(*args, **kwargs)
self.backend = None

class Targets(TextChoices):
CLEAR_AND_PIN = "clear_and_pin", "Clear and pin"
PIN = "pin", "Pin"
CLEAR = "clear", "Clear"
CLEAR_UNPINNED = "clear_unpinned", "Clear unpinned"
SHOW_ALL_AC_QUERIES = "show_all_ac_queries", "Show all autocompletion queries"

QUERIES = "queries", "Queries"
SHOW_ALL_AC_QUERIES = "show_all_ac_queries", \
"Show all autocompletion queries"

PRINT_FORMATS = {
"red": lambda text: f"\033[31m{text}\033[0m",
"blue": lambda text: f"\033[34m{text}\033[0m",
Expand All @@ -43,14 +50,13 @@ def add_arguments(self, parser):
parser.add_argument('backend', nargs=1,
help='Id, Slug or Name of a Backend')
parser.add_argument(
'target', nargs='?', default="clear_and_pin", help='Id, Slug or Name of a Backend')

'target', nargs='?', default="clear_and_pin",
help='Id, Slug or Name of a Backend')


def log(self, msg, format=None, *args, file=sys.stdout):
if args:
msg += " " + " ".join(str(arg) for arg in args)

printMsg = self.PRINT_FORMATS[format](msg)
htmlMsg = self.HTML_FORMATS[format](msg)
self._logs.append(htmlMsg)
Expand All @@ -60,23 +66,48 @@ def handle(self, *args, returnLog=False, **options):
target = options["target"]
backend = options["backend"][0]

# Determine backend.
backends = Backend.objects.filter(
name=backend) | Backend.objects.filter(slug=backend)

try:
backends = backends | Backend.objects.filter(id=backend)
except (ValueError, TypeError):
pass

backend = backends.first()

if not backend:
raise CommandError(
"Please specify a Backend by providing it's name, slug or ID"
)

self.backend = backend

# Get the warmup queries.
self. warmupQueries = [
(self.backend.warmupQuery1,
"Entities names aliases score, ordered by score"),
(self.backend.warmupQuery2,
"Entities names aliases score, ordered by alias"),
(self.backend.warmupQuery3,
"Entities names aliases score, ordered by entity"),
(self.backend.warmupQuery4,
"Predicates names aliases score, for use without prefix"),
(self.backend.warmupQuery5,
"Predicates names aliases score, for use with prefix"),
]

# EXPERIMENTAL: For Pubchem, add result of half-sensitive object AC for
# predicate rdf:type to warmup queries.
if self.backend.slug == "pubchem":
for predicate in ["rdf:type"]:
halfSensitiveWarmupQuery = (
"SELECT ?type (COUNT(?type) AS ?count) WHERE {\n"
" ?subject " + predicate + " ?type\n"
"} GROUP BY ?type")
self.warmupQueries.append(
(halfSensitiveWarmupQuery,
"Half-sensitive object AC query for predicate " +
predicate))

# Do something depending on the target.
if target == self.Targets.CLEAR_AND_PIN:
self.clear()
self.pin()
Expand All @@ -87,27 +118,30 @@ def handle(self, *args, returnLog=False, **options):
self.clear()
elif target == self.Targets.CLEAR_UNPINNED:
self.clear(onlyUnpinned=True)
elif target == self.Targets.QUERIES:
return self.getAllWarmupQueries()
elif target == self.Targets.SHOW_ALL_AC_QUERIES:
self.showAutocompleteQueries()
else:
raise CommandError("Unknown target: " + target)

# TODO: Not sure what this is good for.
if returnLog:
return self._logs

# Send a request to QLever.
def request_to_qlever(self, params):
headers = { "Accept": "application/qlever-results+json" }
# print(f"PYTHON VERSION: {sys.version}", file=sys.stderr)
headers = {"Accept": "application/qlever-results+json"}
try:
response = requests.post(self.backend.baseUrl, data=params, headers=headers)
# response = requests.get(self.backend.baseUrl, params=params, headers=headers)
response.raise_for_status()
response = requests.post(self.backend.baseUrl,
data=params, headers=headers)
return response
except requests.exceptions.RequestException as e:
self.log(f"An exception of type {type(e).__name__} occurred ({e})",
format="red", file=sys.stderr)
self.log(f"Error requesting {self.backend.baseUrl}: {e}",
format="red")
return None

# Clear the cache.
def clear(self, onlyUnpinned=False):
if onlyUnpinned:
msg = "Clear cache, but only the unpinned results"
Expand All @@ -116,63 +150,55 @@ def clear(self, onlyUnpinned=False):
msg = "Clear cache completely, including the pinned results"
params = {"cmd": "clear-cache-complete"}
self.log(msg, format="bold")
response = self.request_to_qlever(params)
self.request_to_qlever(params)

# Pin warmup queries and frequent predicates.
def pin(self):
prefixString = self._getPrefixString()

# Standard warmup queries.
warmups = [
(self.backend.warmupQuery1,
"Entities names aliases score, ordered by score, full result for Subject AC query with empty prefix"),
(self.backend.warmupQuery2,
"Entities names aliases score, ordered by alias, part of Subject AC query with non-empty prefix"),
(self.backend.warmupQuery3,
"Entities names aliases score, ordered by entity, part of Object AC query"),
(self.backend.warmupQuery4,
"Predicates names aliases score, without prefix (only wdt: and schema:about)"),
(self.backend.warmupQuery5,
"Predicates names aliases score, with prefix (all predicates)"),
]

# pin warmup queries
for warmup, headline in warmups:
# Pin warmup queries.
for warmupQuery, description in self.warmupQueries:
self.log(" ")
self.log(f"Pin: {headline}", format="bold")
warmupQuery = self._buildQuery(warmup)
self.log(f"Pin: {description}", format="bold")
warmupQuery = self._buildQuery(warmupQuery)
self.log(warmupQuery)
self._pinQuery(f"{prefixString}\n{warmupQuery}")
warmupQuery = f"{prefixString}\n{warmupQuery}"
self._pinQuery(warmupQuery)

# clear unpinned
# Clear the rest of the cache.
self.log(" ")
self.clear(onlyUnpinned=True)

# pin frequent predicates
# Pin frequent predicates (two orders each: by subject and by object).
for predicate in self.backend.frequentPredicates.split(" "):
if not predicate:
if not predicate or predicate.startswith("#"):
continue
self.log(" ")
self.log(f"Pin: {predicate} ordered by subject", format="bold")
query = f"SELECT ?x ?y WHERE {{ ?x {predicate} ?y }} INTERNAL SORT BY ?x"
query = (f"SELECT ?x ?y WHERE {{ ?x {predicate} ?y }} "
f"INTERNAL SORT BY ?x")
self.log(query)
self._pinQuery(f"{prefixString}\n{query}")

self.log(" ")
self.log(f"Pin: {predicate} ordered by object", format="bold")
query = f"SELECT ?x ?y WHERE {{ ?x {predicate} ?y }} INTERNAL SORT BY ?y"
query = (f"SELECT ?x ?y WHERE {{ ?x {predicate} ?y }} "
f"INTERNAL SORT BY ?y")
self.log(query)
self._pinQuery(f"{prefixString}\n{query}")

# pin frequent patterns
# Pin frequent predicates (ordered by subject only).
for pattern in self.backend.frequentPatternsWithoutOrder.split(" "):
if not pattern:
if not pattern or pattern.startswith("#"):
continue
self.log(" ")
self.log(f"Pin: {pattern} ordered by subject only", format="bold")
query = f"SELECT ?x ?y WHERE {{ ?x {pattern} ?y }} INTERNAL SORT BY ?x"
query = (f"SELECT ?x ?y WHERE {{ ?x {pattern} ?y }} "
f"INTERNAL SORT BY ?x")
self.log(query)
self._pinQuery(f"{prefixString}\n{query}")

# TODO: Deprecated, but there is still a button in the frontend using this.
def showAutocompleteQueries(self):
self.log("Subject AC query", format="bold")
self.log(self._buildQuery(self.backend.suggestSubjects))
Expand All @@ -181,9 +207,49 @@ def showAutocompleteQueries(self):
self.log("Object AC query", format="bold")
self.log(self._buildQuery(self.backend.suggestSubjects))

# Get TSV with one description and warmup query per line.
#
# NOTE: Used by qlever script for action `autocompletion-warmup`.
def getAllWarmupQueries(self):
tvs_lines = []
# The standard warmup queries.
for warmupQuery, description in self.warmupQueries:
warmupQuery = self._buildQuery(warmupQuery)
warmupQuery = self._addPrefixes(warmupQuery)
warmupQuery = self._normalizeQuery(warmupQuery)
tvs_lines.append(f"{description}\t{warmupQuery}")
# The frequent predicates (both orders).
for predicate in self.backend.frequentPredicates.split(" "):
if not predicate or predicate.startswith("#"):
continue
for sort_by in ["subject", "object"]:
description = f"{predicate} ordered by {sort_by}"
query = (f"SELECT ?subject ?object WHERE {{ "
f"?subject {predicate} ?object "
f"}} INTERNAL SORT BY ?{sort_by}")
query = self._buildQuery(query)
query = self._addPrefixes(query)
query = self._normalizeQuery(query)
tvs_lines.append(f"{description}\t{query}")
# The frequent predicates (ordered by subject only).
for pattern in self.backend.frequentPatternsWithoutOrder.split(" "):
if not pattern or pattern.startswith("#"):
continue
description = f"{pattern} ordered by subject only"
query = (f"SELECT ?subject ?object WHERE {{ "
f"?subject {pattern} ?object "
f"}} INTERNAL SORT BY ?subject")
query = self._buildQuery(query)
query = self._addPrefixes(query)
query = self._normalizeQuery(query)
tvs_lines.append(f"{description}\t{query}")
return "\n".join(tvs_lines)

# Fill in the placeholders in the given query.
def _buildQuery(self, completionQuery):
substitutionFinished = True
for placeholder, replacement in self.backend.getWarmupAndAcPlaceholders().items():
for placeholder, replacement in \
self.backend.getWarmupAndAcPlaceholders().items():
newQuery = completionQuery.replace(f"%{placeholder}%", replacement)
if (newQuery != completionQuery):
substitutionFinished = False
Expand All @@ -199,18 +265,49 @@ def _buildQuery(self, completionQuery):
else:
return self._buildQuery(completionQuery)

# Get all the list of predefined prefixes for the given backend.
def _getPrefixString(self):
prefixString = "\n".join(
[f"PREFIX {prefixName}: <{prefix}>" for prefixName, prefix in self.backend.availablePrefixes.items()])
[f"PREFIX {prefixName}: <{prefix}>" for prefixName,
prefix in self.backend.availablePrefixes.items()])
return prefixString

# Add only those prefixes to the query that are actually used in the query.
def _addPrefixes(self, query):
prefixNames = re.findall(r"([a-zA-Z0-9]+):[a-zA-Z0-9]+", query)
prefixDefinitions = []
for prefixName in list(dict.fromkeys(prefixNames)):
prefix = self.backend.availablePrefixes[prefixName]
prefixDefinitions.append(f"PREFIX {prefixName}: <{prefix}>")
return "\n".join(prefixDefinitions) + "\n" + query

# Pin the given query.
def _pinQuery(self, query):
params = { "query": query, "pinresult": "true", "send": "10" }
params = {"query": query, "pinresult": "true", "send": "10"}
response = self.request_to_qlever(params)
if response:
if response is not None:
jsonData = response.json()
if "exception" in jsonData:
self.log(f"ERROR in processing query: {jsonData['exception']}",
format="red")
self.log(f"ERROR processing query: {jsonData['exception']}",
format="red")
else:
self.log(f"Result size: {jsonData['resultsize']:,}", format="blue")
self.log(f"Result size: {jsonData['resultsize']:,}",
format="blue")

# Normalize the given query to a one-liner.
#
# TODO: Copied from commands/exampls.py -> this should be at one place.
def _normalizeQuery(self, query):
# Replace # in IRIs by %23.
query = re.sub(r'(<[^>]+)#', r'\1%23', query)
# Remove comments.
query = re.sub(r'#.*\n', ' ', query, flags=re.MULTILINE)
# Re-replace %23 in IRIs by #.
query = re.sub(r'(<[^>]+)%23', r'\1#', query)
# Replace all sequences of whitespace by a single space.
query = re.sub(r'\s+', ' ', query)
# Remove . before }.
query = re.sub(r'\s*\.\s*}', ' }', query)
# Remove any trailing whitespace.
query = query.strip()
return query
29 changes: 16 additions & 13 deletions backend/static/js/helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -960,20 +960,23 @@ function getFormattedResultEntry(str, maxLength, column = undefined) {
//
// TODO: What if http occur somewhere inside a literal or a link?
} else if (pos_http > 0) {
isLink = true;
cpy = cpy.replace(/ /g, '_');
link = cpy.match(/(https?:\/\/[a-zA-Z0-9.:%/#\?_-]+)/g)[0];
checkLink = link.toLowerCase();
if (checkLink.endsWith('jpg') || checkLink.endsWith('png') || checkLink.endsWith('gif') || checkLink.endsWith('jpeg') || checkLink.endsWith('svg')) {
str = "";
linkStart = '<a href="' + link + '" target="_blank"><img src="' + link + '" width="50" >';
linkEnd = '</a>';
} else if (checkLink.endsWith('pdf') || checkLink.endsWith('doc') || checkLink.endsWith('docx')) {
linkStart = '<span style="white-space: nowrap;"><a href="' + link + '" target="_blank"><i class="glyphicon glyphicon-file"></i>&nbsp;';
linkEnd = '</a></span>';
} else {
linkStart = '<span style="white-space: nowrap;"><a href="' + link + '" target="_blank"><i class="glyphicon glyphicon-link"></i>&nbsp;';
linkEnd = '</a></span>';
link_match = cpy.match(/(https?:\/\/[a-zA-Z0-9.:%/#\?_-]+)/g);
if(link_match != null) {
isLink = true;
link = link_match[0];
checkLink = link.toLowerCase();
if (checkLink.endsWith('jpg') || checkLink.endsWith('png') || checkLink.endsWith('gif') || checkLink.endsWith('jpeg') || checkLink.endsWith('svg')) {
str = "";
linkStart = '<a href="' + link + '" target="_blank"><img src="' + link + '" width="50" >';
linkEnd = '</a>';
} else if (checkLink.endsWith('pdf') || checkLink.endsWith('doc') || checkLink.endsWith('docx')) {
linkStart = '<span style="white-space: nowrap;"><a href="' + link + '" target="_blank"><i class="glyphicon glyphicon-file"></i>&nbsp;';
linkEnd = '</a></span>';
} else {
linkStart = '<span style="white-space: nowrap;"><a href="' + link + '" target="_blank"><i class="glyphicon glyphicon-link"></i>&nbsp;';
linkEnd = '</a></span>';
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions backend/templates/partials/head.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
var SUGGESTSUBJECTS_CONTEXT_INSENSITIVE = ENTITYNAMERELATIONS["SUGGESTSUBJECTSCONTEXTINSENSITIVE"];
var SUGGESTPREDICATES_CONTEXT_INSENSITIVE = ENTITYNAMERELATIONS["SUGGESTPREDICATESCONTEXTINSENSITIVE"];
var SUGGESTOBJECTS_CONTEXT_INSENSITIVE = ENTITYNAMERELATIONS["SUGGESTOBJECTSCONTEXTINSENSITIVE"];
var SUGGESTOBJECTS_CONTEXT_HALFSENSITIVE = SUGGESTOBJECTS.replace(/%CONNECTED_TRIPLES%\s*/g, "")
// console.log("Half-sensitive object AC query template:\n", SUGGESTOBJECTS_CONTEXT_HALFSENSITIVE);
var COLLECTEDPREFIXES = {% autoescape off %}{{ prefixes }}{% endautoescape %};
var LANGUAGES = {% autoescape off %}{{ backend.languages }}{% endautoescape %};
var KEYWORDS = {% autoescape off %}{{ backend.keywords }}{% endautoescape %};
Expand Down
Loading

0 comments on commit 7a1077e

Please sign in to comment.