001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.fields;
033
034import org.apache.solr.uninverting.UninvertingReader.Type;
035import org.opencms.util.CmsStringUtil;
036
037import java.io.Serializable;
038import java.util.ArrayList;
039import java.util.List;
040import java.util.Map;
041
042/**
043 * A abstract implementation for a search field.<p>
044 *
045 * @since 8.5.0
046 */
047public class CmsSearchField implements Serializable {
048
049    /** Name of the field that contains the (optional) category of the document (hardcoded). */
050    public static final String FIELD_CATEGORY = "category";
051
052    /** Name of the field that usually contains the complete content of the document (optional). */
053    public static final String FIELD_CONTENT = "content";
054
055    /** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */
056    public static final String FIELD_CONTENT_BLOB = "contentblob";
057
058    /** Name of the field that contains the locale of the document. */
059    public static final String FIELD_CONTENT_LOCALES = "con_locales";
060
061    /** Name of the field that contains the document content date (hardcoded). */
062    public static final String FIELD_DATE_CONTENT = "contentdate";
063
064    /** Name of the field that contains the document creation date (hardcoded). */
065    public static final String FIELD_DATE_CREATED = "created";
066
067    /** Name of the field that contains the document creation date for fast lookup (hardcoded). */
068    public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup";
069
070    /** The field name for the expiration date. */
071    public static final String FIELD_DATE_EXPIRED = "expired";
072
073    /** Name of the field that contains the document last modification date (hardcoded). */
074    public static final String FIELD_DATE_LASTMODIFIED = "lastmodified";
075
076    /** Name of the field that contains the document last modification date for fast lookup (hardcoded). */
077    public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup";
078
079    /** The lookup suffix for date fields. */
080    public static final String FIELD_DATE_LOOKUP_SUFFIX = "_lookup";
081
082    /** The field name for the release date. */
083    public static final String FIELD_DATE_RELEASED = "released";
084
085    /** The dependency type. */
086    public static final String FIELD_DEPENDENCY_TYPE = "dependencyType";
087
088    /** Name of the field that usually contains the value of the "Description" property of the document (optional). */
089    public static final String FIELD_DESCRIPTION = "description";
090
091    /** Name of the dynamic exact field. */
092    public static final String FIELD_DYNAMIC_EXACT = "_exact";
093
094    /** Name of the dynamic property field (searched properties). */
095    public static final String FIELD_DYNAMIC_PROPERTIES = "_prop";
096
097    /** Name of the dynamic property field (non-searched properties). */
098    public static final String FIELD_DYNAMIC_PROPERTIES_DIRECT = "_dprop";
099
100    /** The name of the dynamic field that stores the shortened value of the content field in order to save performance. */
101    public static final String FIELD_EXCERPT = "_excerpt";
102
103    /** Name of the field that contains the filename. */
104    public static final String FIELD_FILENAME = "filename";
105
106    /** Name of the field that contains the documents structure id. */
107    public static final String FIELD_ID = "id";
108
109    /** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */
110    public static final String FIELD_KEYWORDS = "keywords";
111
112    /** The field name for the link. */
113    public static final String FIELD_LINK = "link";
114
115    /**
116     * Name of the field that usually combines all document "meta" information,
117     * that is the values of the "Title", "Keywords" and "Description" properties (optional).
118     */
119    public static final String FIELD_META = "meta";
120
121    /** Name of the field that contains the mime type. */
122    public static final String FIELD_MIMETYPE = "mimetype";
123
124    /** Name of the field that contains all VFS parent folders of a document (hardcoded). */
125    public static final String FIELD_PARENT_FOLDERS = "parent-folders";
126
127    /** Name of the field that contains the document root path in the VFS (hardcoded). */
128    public static final String FIELD_PATH = "path";
129
130    /** The prefix used to store dependency fields. */
131    public static final String FIELD_PREFIX_DEPENDENCY = "dep_";
132
133    /** The prefix for dynamic fields. */
134    public static final String FIELD_PREFIX_DYNAMIC = "*_";
135
136    /** The default text field prefix. */
137    public static final String FIELD_PREFIX_TEXT = "text_";
138
139    /** The default string field postfix. */
140    public static final String FIELD_POSTFIX_STRING = "_s";
141
142    /** The default (single-valued) date field postfix. */
143    public static final String FIELD_POSTFIX_DATE = "_dt";
144
145    /** The default (multi-valued) dates field postfix. */
146    public static final String FIELD_POSTFIX_DATES = "_dts";
147
148    /** The default int field postfix. */
149    public static final String FIELD_POSTFIX_INT = "_i";
150
151    /** The default field postfix for alpha-numeric sorting. */
152    public static final String FIELD_POSTFIX_SORT = "_sort";
153
154    /**
155     * Name of the field that contains the (optional) document priority,
156     * which can be used to boost the document in the result list (hardcoded).
157     */
158    public static final String FIELD_PRIORITY = "priority";
159
160    /** Name of the field that contains the resource locales of the document. */
161    public static final String FIELD_RESOURCE_LOCALES = "res_locales";
162
163    /** The name of the score field. */
164    public static final String FIELD_SCORE = "score";
165
166    /** Name of the field that contains the searched property value of 'search.exclude'. */
167    public static final String FIELD_SEARCH_EXCLUDE = "search_exclude";
168
169    /** Name of the field that usually contains file size. */
170    public static final String FIELD_SIZE = "size";
171
172    /** Name of the field that contains the lower-case title, untokenized, for sorting. */
173    public static final String FIELD_SORT_TITLE = "sort-title";
174
175    /** Name of the field that contains the resource state. */
176    public static final String FIELD_STATE = "state";
177
178    /** Name of the field that contains the file name suffix of the resource. */
179    public static final String FIELD_SUFFIX = "suffix";
180
181    /** Name of the field that contains the general text of a resource and also serves as prefix. */
182    public static final String FIELD_TEXT = "text";
183
184    /**
185     * Name of the field that usually contains the value of the "Title" property of the document
186     * as a keyword used for sorting and also for retrieving the title text (optional).
187     *
188     * Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p>
189     */
190    public static final String FIELD_TITLE = "title-key";
191
192    /**
193     * Name of the field that usually contains the value of the "Title" property of the document
194     * in an analyzed form used for searching in the title (optional).
195     */
196    public static final String FIELD_TITLE_UNSTORED = "title";
197
198    // TODO: Comments
199    public static final String FIELD_TIMESTAMP = "timestamp";
200    public static final String FIELD_PATH_HIERARCHY = "path_hierarchy";
201    /** Name of the field that contains the gallery index container information. */
202    public static final String FIELD_CONTAINER_TYPES = "container_types";
203    public static final String FIELD_CATEGORY_EXACT = "category_exact";
204    /** Name of the field that contains the gallery index additional information. */
205    public static final String FIELD_ADDITIONAL_INFO = "additional_info";
206    public static final String FIELD_PLACE = "place";
207    public static final String FIELD_SPELL = "spell";
208    // TODO: concat those field names; "text" + locale, where needed like content fields or exceprt fields
209    public static final String FIELD_TEXT_EN = "text_en";
210    public static final String FIELD_TEXT_DE = "text_de";
211    public static final String FIELD_TEXT_EL = "text_el";
212    public static final String FIELD_TEXT_ES = "text_es";
213    public static final String FIELD_TEXT_FR = "text_fr";
214    public static final String FIELD_TEXT_HU = "text_hu";
215    public static final String FIELD_TEXT_IT = "text_it";
216    public static final String FIELD_SEARCH_CHANNEL = "search_channel";
217
218    /** The field PREFIX of the fields that contain the display title (without locale and postfix "_s"). */
219    public static final String FIELD_DISPTITLE = "disptitle";
220
221    /** The field PREFIX of the fields that contain the display order (without locale and postfix "_i"). */
222    public static final String FIELD_DISPORDER = "disporder";
223
224    /** The field PREFIX where the start date for the single entry of a serial date entry set is stored. */
225    public static final String FIELD_INSTANCEDATE = "instancedate";
226
227    /** The field PREFIX where the end date for the single entry of a serial date entry set is stored. */
228    public static final String FIELD_INSTANCEDATE_END = "instancedateend";
229
230    /** The field PREFIX where the date until which the single entry of a serial date entry should be treated as "current" is stored. */
231    public static final String FIELD_INSTANCEDATE_CURRENT_TILL = "instancedatecurrenttill";
232
233    /** The field where the dates for a serial date are stored. */
234    public static final String FIELD_SERIESDATES = "seriesdates" + FIELD_POSTFIX_DATES;
235
236    /** The field where the end dates for a serial date are stored.
237     *  NOTE: The field is only used during indexing and not stored in the content itself.
238     */
239    public static final String FIELD_SERIESDATES_END = "seriesdatesend" + FIELD_POSTFIX_DATES;
240
241    /** The field where the dates until when the single serial dates are treated as "current" are stored.
242     *  NOTE: The field is only used during indexing and not stored in the content itself.
243     */
244    public static final String FIELD_SERIESDATES_CURRENT_TILL = "seriesdatescurrenttill" + FIELD_POSTFIX_DATES;
245
246    /** The field where the type of the date series is stored. */
247    public static final String FIELD_SERIESDATES_TYPE = "seriesdatestype" + FIELD_POSTFIX_STRING;
248
249    /** Name of the field that contains the type of the document. */
250    public static final String FIELD_TYPE = "type";
251
252    /** Name of the field that contains the user created. */
253    public static final String FIELD_USER_CREATED = "userCreated";
254
255    /** Name of the field that contains the user last modified. */
256    public static final String FIELD_USER_LAST_MODIFIED = "userLastModified";
257
258    /** Name of the field that contains the latest version number of the resource. */
259    public static final String FIELD_VERSION = "version";
260
261    /** Name of the field that contains the unique Solr id. */
262    public static final String FIELD_SOLR_ID = "solr_id";
263
264    /** Serial version UID. */
265    private static final long serialVersionUID = 3185631015824549119L;
266
267    /** A default value for the field in case the content does not provide the value. */
268    private String m_defaultValue;
269
270    /** Indicates if this field should be used for generating the excerpt. */
271    private boolean m_excerpt;
272
273    /** Indicates if the content of this field should be indexed. */
274    private boolean m_indexed;
275
276    /** The search field mappings. */
277    private List<I_CmsSearchFieldMapping> m_mappings;
278
279    /** The name of the field. */
280    private String m_name;
281
282    /** Indicates if the content of this field should be stored. */
283    private boolean m_stored;
284
285    /**
286     * Creates a new search field.<p>
287     */
288    public CmsSearchField() {
289
290        m_mappings = new ArrayList<I_CmsSearchFieldMapping>();
291    }
292
293    /**
294     * Creates a new search field.<p>
295     *
296     * @param name the name of the field, see {@link #setName(String)}
297     * @param defaultValue the default value to use, see {@link #setDefaultValue(String)}
298     *
299     */
300    public CmsSearchField(String name, String defaultValue) {
301
302        this();
303        m_name = name;
304        m_defaultValue = defaultValue;
305    }
306
307    /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}.
308     *  The method adds all default fields.
309     * @param uninvertingMap the map to which the fields are added.
310     */
311    public static void addUninvertingMappings(Map<String, Type> uninvertingMap) {
312
313        uninvertingMap.put(FIELD_CATEGORY, Type.SORTED);
314        uninvertingMap.put(FIELD_CONTENT, Type.SORTED);
315        uninvertingMap.put(FIELD_CONTENT_BLOB, Type.SORTED);
316        uninvertingMap.put(FIELD_CONTENT_LOCALES, Type.SORTED);
317        uninvertingMap.put(FIELD_DATE_CONTENT, Type.SORTED);
318        uninvertingMap.put(FIELD_DATE_CREATED, Type.SORTED);
319        uninvertingMap.put(FIELD_DATE_CREATED_LOOKUP, Type.SORTED);
320        uninvertingMap.put(FIELD_DATE_EXPIRED, Type.SORTED);
321        uninvertingMap.put(FIELD_DATE_LASTMODIFIED, Type.SORTED);
322        uninvertingMap.put(FIELD_DATE_LASTMODIFIED_LOOKUP, Type.SORTED);
323        uninvertingMap.put(FIELD_DATE_LOOKUP_SUFFIX, Type.SORTED);
324        uninvertingMap.put(FIELD_DATE_RELEASED, Type.SORTED);
325        uninvertingMap.put(FIELD_DEPENDENCY_TYPE, Type.SORTED);
326        uninvertingMap.put(FIELD_DESCRIPTION, Type.SORTED);
327        uninvertingMap.put(FIELD_DYNAMIC_EXACT, Type.SORTED);
328        uninvertingMap.put(FIELD_DYNAMIC_PROPERTIES, Type.SORTED);
329        uninvertingMap.put(FIELD_EXCERPT, Type.SORTED);
330        uninvertingMap.put(FIELD_FILENAME, Type.SORTED);
331        uninvertingMap.put(FIELD_ID, Type.SORTED);
332        uninvertingMap.put(FIELD_KEYWORDS, Type.SORTED);
333        uninvertingMap.put(FIELD_LINK, Type.SORTED);
334        uninvertingMap.put(FIELD_META, Type.SORTED);
335        uninvertingMap.put(FIELD_MIMETYPE, Type.SORTED);
336        uninvertingMap.put(FIELD_PARENT_FOLDERS, Type.SORTED);
337        uninvertingMap.put(FIELD_PATH, Type.SORTED);
338        uninvertingMap.put(FIELD_PREFIX_DEPENDENCY, Type.SORTED);
339        uninvertingMap.put(FIELD_PREFIX_DYNAMIC, Type.SORTED);
340        uninvertingMap.put(FIELD_PREFIX_TEXT, Type.SORTED);
341        uninvertingMap.put(FIELD_PRIORITY, Type.SORTED);
342        uninvertingMap.put(FIELD_RESOURCE_LOCALES, Type.SORTED);
343        uninvertingMap.put(FIELD_SCORE, Type.SORTED);
344        uninvertingMap.put(FIELD_SEARCH_EXCLUDE, Type.SORTED);
345        uninvertingMap.put(FIELD_SIZE, Type.SORTED);
346        uninvertingMap.put(FIELD_SORT_TITLE, Type.SORTED);
347        uninvertingMap.put(FIELD_STATE, Type.SORTED);
348        uninvertingMap.put(FIELD_SUFFIX, Type.SORTED);
349        uninvertingMap.put(FIELD_TEXT, Type.SORTED);
350        uninvertingMap.put(FIELD_TITLE, Type.SORTED);
351        uninvertingMap.put(FIELD_TITLE_UNSTORED, Type.SORTED);
352        uninvertingMap.put(FIELD_TYPE, Type.SORTED);
353        uninvertingMap.put(FIELD_USER_CREATED, Type.SORTED);
354        uninvertingMap.put(FIELD_USER_LAST_MODIFIED, Type.SORTED);
355        uninvertingMap.put(FIELD_VERSION, Type.SORTED);
356    }
357
358    /**
359     * Adds a new field mapping to the internal list of mappings.<p>
360     *
361     * @param mapping the mapping to add
362     */
363    public void addMapping(I_CmsSearchFieldMapping mapping) {
364
365        m_mappings.add(mapping);
366    }
367
368    /**
369     * Two fields are equal if the name of the Lucene field is equal.<p>
370     *
371     * @see java.lang.Object#equals(java.lang.Object)
372     */
373    @Override
374    public boolean equals(Object obj) {
375
376        if ((obj instanceof CmsSearchField)) {
377            return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).getName());
378        }
379        return false;
380    }
381
382    /**
383     * Returns the default value to use if no content for this field was collected.<p>
384     *
385     * In case no default is configured, <code>null</code> is returned.<p>
386     *
387     * @return the default value to use if no content for this field was collected
388     */
389    public String getDefaultValue() {
390
391        return m_defaultValue;
392    }
393
394    /**
395     * Returns the String value state of this field if it is indexed (and possibly tokenized) in the index.<p>
396     *
397     * <b>IMPORTANT:</b> Not supported by Solr
398     *
399     * @return the String value state of this field if it is indexed (and possibly tokenized) in the index
400     */
401    public String getIndexed() {
402
403        return null;
404    }
405
406    /**
407     * Returns the mappings for this field.<p>
408     *
409     * @return the mappings for this field
410     */
411    public List<I_CmsSearchFieldMapping> getMappings() {
412
413        return m_mappings;
414    }
415
416    /**
417     * Returns the name of this field in the Lucene search index.<p>
418     *
419     * @return the name of this field in the Lucene search index
420     */
421    public String getName() {
422
423        return m_name;
424    }
425
426    /**
427     * The hash code for a field is based only on the field name.<p>
428     *
429     * @see java.lang.Object#hashCode()
430     */
431    @Override
432    public int hashCode() {
433
434        return m_name == null ? 41 : m_name.hashCode();
435    }
436
437    /**
438     * Returns the indexed.<p>
439     *
440     * @return the indexed
441     */
442    public boolean isIndexed() {
443
444        return m_indexed;
445    }
446
447    /**
448     * Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
449     *
450     * @return <code>true</code> if this fields content is used in the search result excerpt
451     *
452     * @see #isStored()
453     */
454    public boolean isInExcerpt() {
455
456        return m_excerpt;
457    }
458
459    /**
460     * Returns <code>true</code> if the content of this field is stored in the Lucene index.<p>
461     *
462     * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
463     * for the concept behind stored and unstored fields.<p>
464     *
465     * @return <code>true</code> if the content of this field is stored in the Lucene index
466     */
467    public boolean isStored() {
468
469        return m_stored;
470    }
471
472    /**
473     * Sets the default value to use if no content for this field was collected.<p>
474     *
475     * @param defaultValue the default value to set
476     */
477    public void setDefaultValue(String defaultValue) {
478
479        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) {
480            m_defaultValue = defaultValue.trim();
481        } else {
482            m_defaultValue = null;
483        }
484    }
485
486    /**
487     * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p>
488     *
489     * @param indexed the indexed to set
490     */
491    public void setIndexed(boolean indexed) {
492
493        m_indexed = indexed;
494    }
495
496    /**
497     * Controls if this fields content is used in the search result excerpt.<p>
498     *
499     * @param excerpt if <code>true</code>, then this fields content is used in the search excerpt
500     */
501    public void setInExcerpt(boolean excerpt) {
502
503        m_excerpt = excerpt;
504    }
505
506    /**
507     * Sets the name of this field in the Lucene search index.<p>
508     *
509     * @param fieldName the name to set
510     */
511    public void setName(String fieldName) {
512
513        m_name = fieldName;
514    }
515
516    /**
517     * Controls if the content of this field is stored in the Lucene index.<p>
518     *
519     * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
520     * for the concept behind stored and unstored fields.<p>
521     *
522     * @param stored if <code>true</code>, then the field content is stored
523     */
524    public void setStored(boolean stored) {
525
526        m_stored = stored;
527    }
528
529    /**
530     * @see java.lang.Object#toString()
531     */
532    @Override
533    public String toString() {
534
535        return getName();
536    }
537}