Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
luigi-asprino committed Jun 22, 2023
1 parent 729c94f commit 18ad0f0
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public class CSVTriplifier implements Triplifier, Slicer {
private static final Logger log = LoggerFactory.getLogger(CSVTriplifier.class);
public final static String PROPERTY_FORMAT = "csv.format", PROPERTY_HEADERS = "csv.headers";
public final static String PROPERTY_DELIMITER = "csv.delimiter";
public final static String PROPERTY_QUOTECHAR = "csv.quote-char";
public final static String PROPERTY_NULLSTRING = "csv.null-string";
public final static String PROPERTY_QUOTE_CHAR = "csv.quote-char";
public final static String PROPERTY_NULL_STRING = "csv.null-string";

public static CSVFormat buildFormat(Properties properties) throws IOException {
CSVFormat format;
Expand All @@ -50,12 +50,12 @@ public static CSVFormat buildFormat(Properties properties) throws IOException {
log.warn("Unsupported csv format: '{}', using default.", properties.getProperty(PROPERTY_FORMAT));
format = CSVFormat.DEFAULT;
}
if(properties.containsKey(PROPERTY_NULLSTRING)){
format = format.withNullString(properties.getProperty(PROPERTY_NULLSTRING)) ;
if(properties.containsKey(PROPERTY_NULL_STRING)){
format = format.withNullString(properties.getProperty(PROPERTY_NULL_STRING)) ;
}
if(properties.containsKey(PROPERTY_QUOTECHAR)){
log.debug("Setting quote char to '{}'", properties.getProperty(PROPERTY_QUOTECHAR).charAt(0));
format = format.withQuote(properties.getProperty(PROPERTY_QUOTECHAR).charAt(0)) ;
if(properties.containsKey(PROPERTY_QUOTE_CHAR)){
log.debug("Setting quote char to '{}'", properties.getProperty(PROPERTY_QUOTE_CHAR).charAt(0));
format = format.withQuote(properties.getProperty(PROPERTY_QUOTE_CHAR).charAt(0)) ;
}
if(properties.containsKey(PROPERTY_DELIMITER)){
log.debug("Setting delimiter to {}", properties.getProperty(PROPERTY_DELIMITER));
Expand Down Expand Up @@ -93,10 +93,14 @@ public LinkedHashMap<Integer, String> makeHeadersMap(Iterator<CSVRecord> recordI
String colstring = columns.next();
String colname = colstring.strip();

if(colname.length()==0){
continue;
}

int c = 0;
while (headers_map.containsValue(colname)) {
c++;
colname += "_" + String.valueOf(c);
colname += "_".concat(String.valueOf(c));
}
log.trace("adding colname >{}<", colname);
headers_map.put(colid, colname);
Expand Down Expand Up @@ -185,18 +189,10 @@ private void processRow(int rown, String dataSourceId, String rootId, CSVRecord
@Override
public Iterable<Slice> slice(Properties properties) throws IOException, TriplifierHTTPException {

// URL url = Triplifier.getLocation(properties);
// log.debug("Location: {}", url);
// if (url == null)
// return Collections.emptySet();

CSVFormat format = buildFormat(properties);
String root = Triplifier.getRootArgument(properties);
Charset charset = Triplifier.getCharsetArgument(properties);

// boolean headers = hasHeaders(properties);
String dataSourceId = ""; // there is always 1 data source id
// String containerRowPrefix = root + "#row";

// XXX How do we close the inputstream?
final InputStream is = Triplifier.getInputStream(properties);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,44 @@ public void testIssue351() throws URISyntaxException, IOException {

}

/**
* See <a href="https://github.com/SPARQL-Anything/sparql.anything/issues/352">...</a>
*/
@Test
public void testIssue352() throws URISyntaxException, IOException {
Dataset ds = DatasetFactory.createGeneral();
QC.setFactory(ARQ.getContext(), FacadeX.ExecutorFactory);
Query query;

String queryStr = IOUtils.toString(Objects.requireNonNull(getClass().getClassLoader().getResource("issues/issue352-xls.sparql")).toURI(), StandardCharsets.UTF_8);
String loc = Paths.get(Objects.requireNonNull(getClass().getClassLoader().getResource("issues/issue352.xls")).toURI()).toUri().toString();
queryStr = queryStr.replace("%%%LOCATION%%%", loc);

query = QueryFactory.create(queryStr);

QueryExecution qExec = QueryExecutionFactory.create(query, ds);
ResultSet rs = qExec.execSelect();

assertTrue(rs.hasNext());
QuerySolution qs = rs.next();
Assert.assertEquals("fred", qs.getLiteral("fred").getString());
Assert.assertEquals("sally", qs.getLiteral("sally").getString());


queryStr = IOUtils.toString(Objects.requireNonNull(getClass().getClassLoader().getResource("issues/issue352-csv.sparql")).toURI(), StandardCharsets.UTF_8);
loc = Paths.get(Objects.requireNonNull(getClass().getClassLoader().getResource("issues/issue352.csv")).toURI()).toUri().toString();
queryStr = queryStr.replace("%%%LOCATION%%%", loc);

query = QueryFactory.create(queryStr);
qExec = QueryExecutionFactory.create(query, ds);
rs = qExec.execSelect();

assertTrue(rs.hasNext());
qs = rs.next();
Assert.assertEquals("fred", qs.getLiteral("fred").getString());
Assert.assertEquals("sally", qs.getLiteral("sally").getString());
}

/**
* See <a href="https://github.com/SPARQL-Anything/sparql.anything/issues/386">...</a>
*/
Expand Down
12 changes: 12 additions & 0 deletions sparql-anything-it/src/test/resources/issues/issue352-csv.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
PREFIX fx: <http://sparql.xyz/facade-x/ns/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?fred ?sally
WHERE {
SERVICE <x-sparql-anything:location=%%%LOCATION%%%>
{
fx:properties fx:csv.headers true .
?root a fx:root ;
rdf:_1 [rdf:_1 ?fred] ;
rdf:_2 [rdf:_1 ?sally] .
}
}
15 changes: 15 additions & 0 deletions sparql-anything-it/src/test/resources/issues/issue352-xls.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PREFIX fx: <http://sparql.xyz/facade-x/ns/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?fred ?sally
WHERE {
SERVICE <x-sparql-anything:location=%%%LOCATION%%%>
{
# GRAPH <%%%LOCATION%%%#Sheet1> {
fx:properties fx:spreadsheet.headers true .
GRAPH <%%%LOCATION%%%#Sheet1> {
?root a fx:root ;
rdf:_1 [rdf:_1 ?fred] ;
rdf:_2 [rdf:_1 ?sally] .
}
}
}
3 changes: 3 additions & 0 deletions sparql-anything-it/src/test/resources/issues/issue352.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
,state,age,dog name
fred,CO,34,fido
sally,FL,44,wilma
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import java.io.IOException;
import java.net.URL;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Properties;
import java.util.Set;
Expand Down Expand Up @@ -59,6 +60,16 @@ public void triplify(Properties properties, FacadeXGraphBuilder builder) throws

}

@Override
public Set<String> getMimeTypes() {
return Sets.newHashSet("application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
}

@Override
public Set<String> getExtensions() {
return Sets.newHashSet("xls", "xlsx");
}

private void populate(Sheet s, String dataSourceId, FacadeXGraphBuilder builder, boolean headers, boolean evaluateFormulas, boolean compositeValues) {

// Add type Root
Expand All @@ -72,30 +83,30 @@ private void populate(Sheet s, String dataSourceId, FacadeXGraphBuilder builder,
if (headers && rowNum == 0) {
Row row = s.getRow(rowNum);
int columnId = 0;
for (int cellNum = row.getFirstCellNum(); cellNum < row.getLastCellNum(); cellNum++) {
for (int cellNum = 0; cellNum < row.getLastCellNum(); cellNum++) {
columnId++;
Cell cell = row.getCell(cellNum);
Object value = extractCellValue(cell, evaluateFormulas);
String columnString = value.toString();

String columnName = columnString.strip();
if ("".equals(columnName)) {
columnName = Integer.toString(columnId);
continue;
}
int c = 0;
while (headers_map.containsValue(columnName)) {
c++;
columnName += "_" + c;
}

log.trace("adding column name >{}<", columnName);
log.trace("adding column name >{}< (column id {})", columnName, columnId);
headers_map.put(columnId, columnName);
}

} else {
// Rows
rowNumber++;
String row = "_Row_".concat(String.valueOf(rowNumber));
String row = "_Row_".concat(String.valueOf(rowNumber));
builder.addContainer(dataSourceId, SPARQLAnythingConstants.ROOT_ID, rowNumber, row);
Row record = s.getRow(rowNum);
logger.trace("Reading Row {} from sheet {}", rowNum, s.getSheetName());
Expand All @@ -104,18 +115,17 @@ private void populate(Sheet s, String dataSourceId, FacadeXGraphBuilder builder,
int columnId = 0;
for (int cellNum = record.getFirstCellNum(); cellNum < record.getLastCellNum(); cellNum++) {
Cell cell = record.getCell(cellNum);
columnId++;
if (compositeValues) {
String value = row.concat("_").concat(String.valueOf(cellNum));
extractCompositeCellValue(dataSourceId, value, cell, evaluateFormulas, builder);
columnId++;
if (headers && headers_map.containsKey(columnId)) {
builder.addContainer(dataSourceId, row, Triplifier.toSafeURIString(headers_map.get(columnId)), value);
} else {
builder.addValue(dataSourceId, row, columnId, value);
}
} else {
Object value = extractCellValue(cell, evaluateFormulas);
columnId++;
if (headers && headers_map.containsKey(columnId)) {
builder.addValue(dataSourceId, row, Triplifier.toSafeURIString(headers_map.get(columnId)), value);
} else {
Expand Down Expand Up @@ -152,7 +162,6 @@ private Object extractCellValue(Cell cell, boolean evaluateFormulas) {
return "";
}


private void extractCompositeCellValue(String dataSourceId, String containerId, Cell cell, boolean evaluateFormulas, FacadeXGraphBuilder builder) {
if (cell == null) return;
builder.addType(dataSourceId, containerId, cell.getCellType().toString());
Expand Down Expand Up @@ -191,12 +200,12 @@ private void extractCompositeCellValue(String dataSourceId, String containerId,
}
}

if (cell.getCellComment() != null){
if (cell.getCellComment() != null) {
Comment comment = cell.getCellComment();
if(comment.getAuthor()!=null){
if (comment.getAuthor() != null) {
builder.addValue(dataSourceId, containerId, "author", comment.getAuthor());
}
if(comment.getString()!=null){
if (comment.getString() != null) {
RichTextString commentRichTextString = comment.getString();
commentRichTextString.clearFormatting();
builder.addValue(dataSourceId, containerId, "threadedComment", commentRichTextString.getString());
Expand All @@ -206,14 +215,4 @@ private void extractCompositeCellValue(String dataSourceId, String containerId,

}

@Override
public Set<String> getMimeTypes() {
return Sets.newHashSet("application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
}

@Override
public Set<String> getExtensions() {
return Sets.newHashSet("xls", "xlsx");
}

}

0 comments on commit 18ad0f0

Please sign in to comment.