Skip to content

Commit

Permalink
Merge branch 'main' into feature/4334-New-take-on-TEI-support-based-o…
Browse files Browse the repository at this point in the history
…n-generic-XML-support

* main:
  #4340 - Allow pruning elements in XML policy
  • Loading branch information
reckart committed Dec 3, 2023
2 parents 5fa7d85 + 03b9915 commit 0959bba
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ case_sensitive: false
default_attribute_action: DROP
default_element_action: DROP
default_namespace: http://www.w3.org/1999/xhtml
match_without_namespace: true
debug: true
policies:
- action: PASS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;

import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
Expand Down Expand Up @@ -160,6 +161,10 @@ public void endElement(String aUri, String aLocalName, String aQName) throws SAX
element.setChildren(children);
}

for (var l : frame.onEndElementCallbacks) {
l.accept(element);
}

for (var l : listeners) {
l.endElement(element);
}
Expand Down Expand Up @@ -211,7 +216,7 @@ public CharSequence getText()
return text;
}

public Collection<StackFrame> getStack()
protected Collection<StackFrame> getStack()
{
return Collections.unmodifiableCollection(stack);
}
Expand All @@ -231,6 +236,16 @@ public void captureText(boolean aCapture)
stack.peek().setCaptureText(aCapture);
}

public void onEndElement(Consumer<XmlElement> aCallback)
{
if (stack.isEmpty()) {
throw new IllegalStateException(
"onEndElement callback can only be added if an element has been opened");
}

stack.peek().onEndElement(aCallback);
}

public boolean isCapturingText()
{
if (stack.isEmpty()) {
Expand Down Expand Up @@ -263,11 +278,12 @@ default void endElement(XmlElement aElement) throws SAXException
}
}

private static class StackFrame
protected static final class StackFrame
{
private final XmlElement element;
private final List<XmlNode> children = new ArrayList<>();
private boolean captureText;
private final List<Consumer<XmlElement>> onEndElementCallbacks = new ArrayList<>(1);

public StackFrame(XmlElement aElement, boolean aCaptureText)
{
Expand All @@ -280,11 +296,16 @@ public XmlElement getElement()
return element;
}

public void addChild(XmlNode aChild)
void addChild(XmlNode aChild)
{
children.add(aChild);
}

void onEndElement(Consumer<XmlElement> aCallback)
{
onEndElementCallbacks.add(aCallback);
}

public List<XmlNode> getChildren()
{
return children;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ public PolicyCollectionBuilder onElements(QName... aElementNames)
throw new IllegalArgumentException("onElements does not accept an empty list");
}

for (var elementName : aElementNames) {
for (var attributeName : attributeNames) {
AttributePolicy policy = makePolicy(attributeName);
for (var attributeName : attributeNames) {
var policy = makePolicy(attributeName);
for (var elementName : aElementNames) {
parent.attributePolicy(elementName, attributeName, policy);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,12 @@ public enum ElementAction
/**
* Element is not passed through.
*/
DROP;
DROP,

/**
* Element is not passed through and neither are any descendants even if they might otherwise be
* marked to pass.
*/
PRUNE;

}
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ public static ElementPolicy drop()
return new ElementPolicy(ElementAction.DROP);
}

public static ElementPolicy skip()
{
return new ElementPolicy(ElementAction.SKIP);
}

public static ElementPolicy prune()
{
return new ElementPolicy(ElementAction.PRUNE);
}

public static ElementPolicy pass()
{
return new ElementPolicy(ElementAction.PASS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public class ExternalPolicyCollection
private ElementAction defaultElementAction;
private AttributeAction defaultAttributeAction;
private String defaultNamespace;
private boolean matchWithoutNamespace = false;
private boolean useDefaultNamespaceForAttributes = true;

public String getName()
{
Expand Down Expand Up @@ -113,4 +115,24 @@ public void setDefaultNamespace(String aDefaultNamespace)
{
defaultNamespace = aDefaultNamespace;
}

public boolean isMatchWithoutNamespace()
{
return matchWithoutNamespace;
}

public void setMatchWithoutNamespace(boolean aMatchWithoutNamespace)
{
matchWithoutNamespace = aMatchWithoutNamespace;
}

public boolean isUseDefaultNamespaceForAttributes()
{
return useDefaultNamespaceForAttributes;
}

public void setUseDefaultNamespaceForAttributes(boolean aUseDefaultNamespaceForAttribues)
{
useDefaultNamespaceForAttributes = aUseDefaultNamespaceForAttribues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public class PolicyCollectionBuilder
private AttributeAction defaultAttributeAction = AttributeAction.DROP;

private String defaultNamespace;
private boolean useDefaultNamespaceForAttributes = true;
private boolean matchWithoutNamespace = false;

public static PolicyCollectionBuilder caseSensitive()
{
Expand Down Expand Up @@ -75,6 +77,18 @@ public PolicyCollectionBuilder defaultNamespace(String aDefaultNamespace)
return this;
}

public PolicyCollectionBuilder useDefaultNamespaceForAttributes()
{
useDefaultNamespaceForAttributes = true;
return this;
}

public PolicyCollectionBuilder matchWithoutNamespace()
{
matchWithoutNamespace = true;
return this;
}

public PolicyCollectionBuilder defaultAttributeAction(AttributeAction aDefaultAttributeAction)
{
defaultAttributeAction = aDefaultAttributeAction;
Expand Down Expand Up @@ -123,6 +137,24 @@ public PolicyCollectionBuilder disallowElements(QName... aElementNames)
return this;
}

public PolicyCollectionBuilder pruneElements(String... aElementNames)
{
for (var elementName : aElementNames) {
elementPolicy(new QName(elementName), ElementAction.PRUNE);
}

return this;
}

public PolicyCollectionBuilder pruneElements(QName... aElementNames)
{
for (var elementName : aElementNames) {
elementPolicy(elementName, ElementAction.PRUNE);
}

return this;
}

public PolicyCollectionBuilder skipElements(String... aElementNames)
{
for (var elementName : aElementNames) {
Expand All @@ -143,16 +175,28 @@ public PolicyCollectionBuilder skipElements(QName... aElementNames)

PolicyCollectionBuilder elementPolicy(QName aElement, ElementAction aAction)
{
elementPolicyBuilders.put(aElement,
new ElementPolicyBuilder(aElement, aAction, mapSupplier));
if (defaultNamespace == null) {
_elementPolicy(aElement, aAction);
return this;
}

if (isEmpty(aElement.getNamespaceURI())) {
_elementPolicy(new QName(defaultNamespace, aElement.getLocalPart()), aAction);

if (isEmpty(aElement.getNamespaceURI()) && defaultNamespace != null) {
elementPolicy(new QName(defaultNamespace, aElement.getLocalPart()), aAction);
if (matchWithoutNamespace) {
_elementPolicy(aElement, aAction);
}
}

return this;
}

private void _elementPolicy(QName aElement, ElementAction aAction)
{
elementPolicyBuilders.put(aElement,
new ElementPolicyBuilder(aElement, aAction, mapSupplier));
}

public AttributePolicyBuilder allowAttributes(String... aAttributeNames)
{
var attributeNames = Stream.of(aAttributeNames).map(QName::new).toArray(QName[]::new);
Expand Down Expand Up @@ -196,30 +240,43 @@ public PolicyCollection build()
}

void attributePolicy(QName aElementName, QName aAttributeName, AttributePolicy aPolicy)
{
if (defaultNamespace == null) {
_attributePolicy(aElementName, aAttributeName, aPolicy);
}

if (isEmpty(aElementName.getNamespaceURI()) && isEmpty(aAttributeName.getNamespaceURI())) {
var elementName = new QName(defaultNamespace, aElementName.getLocalPart());
var attributeName = useDefaultNamespaceForAttributes
? new QName(defaultNamespace, aAttributeName.getLocalPart())
: aAttributeName;
_attributePolicy(elementName, attributeName, aPolicy);

if (matchWithoutNamespace) {
_attributePolicy(aElementName, aAttributeName, aPolicy);
}
}
}

private void _attributePolicy(QName aElementName, QName aAttributeName, AttributePolicy aPolicy)
{
@SuppressWarnings("unchecked")
Map<QName, AttributePolicy> attributePolicies = elementAttributePolicies
.computeIfAbsent(aElementName, k -> mapSupplier.get());
AttributePolicy attributePolicy = attributePolicies.computeIfAbsent(aAttributeName,
var attributePolicy = attributePolicies.computeIfAbsent(aAttributeName,
k -> AttributePolicy.UNDEFINED);

if (aPolicy instanceof DelegatingAttributePolicy) {
((DelegatingAttributePolicy) aPolicy).setDelegate(attributePolicy);
attributePolicies.put(aAttributeName, aPolicy);
}
else {
AttributePolicy oldPolicy = attributePolicies.put(aAttributeName, aPolicy);
var oldPolicy = attributePolicies.put(aAttributeName, aPolicy);
if (!AttributePolicy.isUndefined(oldPolicy)) {
log.warn("On element [{}] overriding policy for attribute [{}]: [{}] -> [{}]",
aElementName, aAttributeName, oldPolicy, aPolicy);
}
}

if (isEmpty(aElementName.getNamespaceURI()) && isEmpty(aAttributeName.getNamespaceURI())
&& defaultNamespace != null) {
attributePolicy(new QName(defaultNamespace, aElementName.getLocalPart()),
new QName(defaultNamespace, aAttributeName.getLocalPart()), aPolicy);
}
}

public void allowAttribute(QName aAttribute, Pattern aPattern)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,14 @@ public static PolicyCollection loadPolicies(InputStream aIs) throws IOException
if (externalCollection.getDefaultNamespace() != null) {
policyCollectionBuilder.defaultNamespace(externalCollection.getDefaultNamespace());
}
if (externalCollection.isMatchWithoutNamespace()) {
policyCollectionBuilder.matchWithoutNamespace();
}
if (externalCollection.isUseDefaultNamespaceForAttributes()) {
policyCollectionBuilder.useDefaultNamespaceForAttributes();
}

for (ExternalPolicy policy : externalCollection.getPolicies()) {
for (var policy : externalCollection.getPolicies()) {
var isElementPolicy = policy.getElements() != null;
var isAttributesPolicy = policy.getAttributes() != null;

Expand Down
Loading

0 comments on commit 0959bba

Please sign in to comment.