-
Notifications
You must be signed in to change notification settings - Fork 490
/
SearchFields.java
198 lines (186 loc) · 9.08 KB
/
SearchFields.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
package edu.harvard.iq.dataverse.search;
/**
* We define Solr search fields here in one central place so they can be used
* throughout the code but renamed here if need be.
*
* Note that there are many fields in Solr that are *not* here because their
* values come from the database. For example "authorName" comes from the
* database. We update the Solr schema.xml file by merging the output of `curl
* http://localhost:8080/api/admin/index/solr/schema` into the file in the
* source tree when a metadata block update warrants it.
*
* Generally speaking, we want the search fields to be readable. This is a
* challenge for long field names but a power user should be able to type
* "authorAffiliation:Harvard" into the general search box. A regular user is
* much more likely to used Advanced Search to populate that field
* automatically.
*
* Originally, these fields were all snake_case but since the dynamic fields are
* camelCase we might want to standardize on that.
*
* You'll notice that dynamic fields like this are used...
*
* - _s (string)
*
* - _ss (multivalued string)
*
* - _l (long)
*
* - _dt (datetime)
*
* ... and these endings should not be changed unless you plan to convert them
* to non-dynamic (by removing the ending) and specify their "type" in the Solr
* schema.xml.
*
* Most fields we want to be searchable but some are stored with indexed=false
* because we *don't* want them to be searchable and we're just using Solr as a
* convenient key/value store. Why go to the database if you don't have to? For
* a string here or there that needs to be available to both the GUI and the
* Search API, we can just store them in Solr.
*
* For faceting we use a "string" type. If you use something like "text_general"
* the field is tokenized ("Foo Bar" becomes "foo" "bar" which is not what we
* want). See also
* http://stackoverflow.com/questions/16559911/facet-query-will-give-wrong-output-on-dynamicfield-in-solr
*/
public class SearchFields {
/**
* @todo: consider making various dynamic fields (_s) static in schema.xml
* instead. Should they be stored in the database?
*/
// standard fields from example/solr/collection1/conf/schema.xml
// (but we are getting away from these...)
public static final String ID = "id";
public static final String NAME = "name";
/**
* @todo Do we want to support finding dataverses, datasets, and files with
* a query for description:foo? Maybe not, since people will probably just
* use basic search for this. They could also use "dvDescription:foo OR
* dsDescription:foo OR fileDescription:foo" if they *really* only want to
* target the description of all three types at once.
*
* See also https://redmine.hmdc.harvard.edu/issues/3745
*/
public static final String DESCRIPTION = "description";
/**
* Identifiers differ per DvObject: alias for dataverses, globalId for
* datasets, and database id for files.
*/
public static final String IDENTIFIER = "identifier";
/**
* Such as http://dx.doi.org/10.5072/FK2/HXI35W
*
* For files, the URL will be the parent dataset.
*/
public static final String PERSISTENT_URL = "persistentUrl";
public static final String UNF = "unf";
public static final String DATAVERSE_NAME = "dvName";
public static final String DATAVERSE_AFFILIATION = "dvAffiliation";
public static final String DATAVERSE_DESCRIPTION = "dvDescription";
public static final String DATAVERSE_CATEGORY = "dvCategory";
/**
* What is dvSubject_en for? How does it get populated into Solr? The
* behavior changed so that now the subjects of dataverses are based on
* their datasets. Should this be a string so we can facet on it more
* properly? Should all checkboxes on the advanced search page (controlled
* vocabularies) be backed by a string? When we rename this to "foobar" (a
* field Solr doesn't know about) why doesn't Solr complain when we "index
* all"? See also https://github.com/IQSS/dataverse/issues/1681
*/
public static final String DATAVERSE_SUBJECT = "dvSubject";
/**
* A "collapsed" facet (e.g. applies to both dataverses and datasets and is
* merged as a single facet in the GUI) like affiliation that needs to match
* the corresponding dynamic "facet" Solr field at the dataset level to work
* properly. Should we use/expose "_ss" when you click a facet? It needs to
* be different from "subject" which is used for general search but maybe we
* could have a convention like "subjectFacet" for the facets?
*/
public static final String SUBJECT = "subject_ss";
/**
* @todo think about how to tie the fact that this needs to be multivalued
* (_ss) because a multivalued facet (authorAffilition_ss) will be collapsed
* into it at index time. The business logic to determine if a data-driven
* metadata field should be indexed into Solr as a single or multiple value
* lives in the getSolrField() method of DatasetField.java
*
* AFFILIATION is used for the "collapsed" "Affiliation" facet that means
* either "Author Affiliation" or dataverse affiliation. It needs to be a
* string so we can facet on it and it needs to be multivalued because
* "Author Affiliation" can be multivalued.
*/
public static final String AFFILIATION = "affiliation_ss";
public static final String FILE_NAME = "fileName";
public static final String FILE_DESCRIPTION = "fileDescription";
/**
* Can be multivalued and includes both "friendly" and "group" versions:
* "PNG Image", "image"
*/
public static final String FILE_TYPE_SEARCHABLE = "fileType";
/**
* @todo Thie static variable not named properly. We want to expose an
* acutal MIME Type in https://github.com/IQSS/dataverse/issues/1595 . See
* also cleanup ticket at https://github.com/IQSS/dataverse/issues/1314
*
* i.e. "PNG Image"
*/
public static final String FILE_TYPE_FRIENDLY = "fileTypeDisplay";
public static final String FILE_CONTENT_TYPE = "fileContentType";
/**
* Used as a facet for file groups like "image" or "document"
*/
public static final String FILE_TYPE = "fileTypeGroupFacet";
public static final String FILE_SIZE_IN_BYTES = "fileSizeInBytes";
public static final String FILE_MD5 = "fileMd5";
public static final String FILENAME_WITHOUT_EXTENSION = "fileNameWithoutExtension";
public static final String SUBTREE = "subtreePaths";
// i.e. http://localhost:8080/search.xhtml?q=*&fq0=citationdate_dt:[2008-01-01T00%3A00%3A00Z+TO+2011-01-01T00%3A00%3A00Z%2B1YEAR}
// public static final String PRODUCTION_DATE_ORIGINAL = DatasetFieldConstant.productionDate + "_dt";
// public static final String PRODUCTION_DATE_YEAR_ONLY = DatasetFieldConstant.productionDate + "_i";
// public static final String DISTRIBUTION_DATE_ORIGINAL = DatasetFieldConstant.distributionDate + "_dt";
// public static final String DISTRIBUTION_DATE_YEAR_ONLY = DatasetFieldConstant.distributionDate + "_i";
/**
* Solr refers to "relevance" as "score"
*/
public static final String RELEVANCE = "score";
/**
* A dataverse, a dataset, or a file.
*/
public static final String TYPE = "dvObjectType";
public static final String NAME_SORT = "nameSort";
public static final String PUBLICATION_DATE = "publicationDate";
public static final String RELEASE_OR_CREATE_DATE = "dateSort";
/**
* i.e. "Mar 17, 2015"
*/
public static final String RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT = "dateFriendly";
public static final String DEFINITION_POINT = "definitionPointDocId";
public static final String DEFINITION_POINT_DVOBJECT_ID = "definitionPointDvObjectId";
public static final String DISCOVERABLE_BY = "discoverableBy";
/**
* i.e. "Unpublished", "Draft" (multivalued)
*/
public static final String PUBLICATION_STATUS = "publicationStatus";
/**
* @todo reconcile different with Solr schema.xml where type is Long rather
* than String.
*/
public static final String ENTITY_ID = "entityId";
public static final String PARENT_NAME = "parentName";
public static final String PARENT_ID = "parentId";
public static final String PARENT_IDENTIFIER = "parentIdentifier";
public static final String PARENT_CITATION = "parentCitation";
public static final String DATASET_DESCRIPTION = "dsDescriptionValue";
public static final String DATASET_CITATION = "citation";
public static final String DATASET_DEACCESSION_REASON = "deaccessionReason";
/**
* In contrast to PUBLICATION_DATE, this field applies only to datasets for
* more targeted results for just datasets. The format is YYYY (i.e.
* "2015").
*/
public static final String DATASET_PUBLICATION_DATE = "dsPublicationDate";
public static final String DATASET_PERSISTENT_ID = "dsPersistentId";
public static final String DATASET_VERSION_ID = "datasetVersionId";
public static final String VARIABLE_NAME = "variableName";
public static final String VARIABLE_LABEL = "variableLabel";
}