001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.fields; 033 034import org.apache.solr.uninverting.UninvertingReader.Type; 035import org.opencms.util.CmsStringUtil; 036 037import java.io.Serializable; 038import java.util.ArrayList; 039import java.util.List; 040import java.util.Map; 041 042/** 043 * A abstract implementation for a search field.<p> 044 * 045 * @since 8.5.0 046 */ 047public class CmsSearchField implements Serializable { 048 049 /** Name of the field that contains the (optional) category of the document (hardcoded). */ 050 public static final String FIELD_CATEGORY = "category"; 051 052 /** Name of the field that usually contains the complete content of the document (optional). */ 053 public static final String FIELD_CONTENT = "content"; 054 055 /** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */ 056 public static final String FIELD_CONTENT_BLOB = "contentblob"; 057 058 /** Name of the field that contains the locale of the document. */ 059 public static final String FIELD_CONTENT_LOCALES = "con_locales"; 060 061 /** Name of the field that contains the document content date (hardcoded). */ 062 public static final String FIELD_DATE_CONTENT = "contentdate"; 063 064 /** Name of the field that contains the document creation date (hardcoded). */ 065 public static final String FIELD_DATE_CREATED = "created"; 066 067 /** Name of the field that contains the document creation date for fast lookup (hardcoded). */ 068 public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup"; 069 070 /** The field name for the expiration date. */ 071 public static final String FIELD_DATE_EXPIRED = "expired"; 072 073 /** Name of the field that contains the document last modification date (hardcoded). */ 074 public static final String FIELD_DATE_LASTMODIFIED = "lastmodified"; 075 076 /** Name of the field that contains the document last modification date for fast lookup (hardcoded). */ 077 public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup"; 078 079 /** The lookup suffix for date fields. */ 080 public static final String FIELD_DATE_LOOKUP_SUFFIX = "_lookup"; 081 082 /** The field name for the release date. */ 083 public static final String FIELD_DATE_RELEASED = "released"; 084 085 /** The dependency type. */ 086 public static final String FIELD_DEPENDENCY_TYPE = "dependencyType"; 087 088 /** Name of the field that usually contains the value of the "Description" property of the document (optional). */ 089 public static final String FIELD_DESCRIPTION = "description"; 090 091 /** Name of the dynamic exact field. */ 092 public static final String FIELD_DYNAMIC_EXACT = "_exact"; 093 094 /** Name of the dynamic property field (searched properties). */ 095 public static final String FIELD_DYNAMIC_PROPERTIES = "_prop"; 096 097 /** Name of the dynamic property field (non-searched properties). */ 098 public static final String FIELD_DYNAMIC_PROPERTIES_DIRECT = "_dprop"; 099 100 /** The name of the dynamic field that stores the shortened value of the content field in order to save performance. */ 101 public static final String FIELD_EXCERPT = "_excerpt"; 102 103 /** Name of the field that contains the filename. */ 104 public static final String FIELD_FILENAME = "filename"; 105 106 /** Name of the field that contains the documents structure id. */ 107 public static final String FIELD_ID = "id"; 108 109 /** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */ 110 public static final String FIELD_KEYWORDS = "keywords"; 111 112 /** The field name for the link. */ 113 public static final String FIELD_LINK = "link"; 114 115 /** 116 * Name of the field that usually combines all document "meta" information, 117 * that is the values of the "Title", "Keywords" and "Description" properties (optional). 118 */ 119 public static final String FIELD_META = "meta"; 120 121 /** Name of the field that contains the mime type. */ 122 public static final String FIELD_MIMETYPE = "mimetype"; 123 124 /** Name of the field that contains all VFS parent folders of a document (hardcoded). */ 125 public static final String FIELD_PARENT_FOLDERS = "parent-folders"; 126 127 /** Name of the field that contains the document root path in the VFS (hardcoded). */ 128 public static final String FIELD_PATH = "path"; 129 130 /** The prefix used to store dependency fields. */ 131 public static final String FIELD_PREFIX_DEPENDENCY = "dep_"; 132 133 /** The prefix for dynamic fields. */ 134 public static final String FIELD_PREFIX_DYNAMIC = "*_"; 135 136 /** The default text field prefix. */ 137 public static final String FIELD_PREFIX_TEXT = "text_"; 138 139 /** The default string field postfix. */ 140 public static final String FIELD_POSTFIX_STRING = "_s"; 141 142 /** The default (single-valued) date field postfix. */ 143 public static final String FIELD_POSTFIX_DATE = "_dt"; 144 145 /** The default (multi-valued) dates field postfix. */ 146 public static final String FIELD_POSTFIX_DATES = "_dts"; 147 148 /** The default int field postfix. */ 149 public static final String FIELD_POSTFIX_INT = "_i"; 150 151 /** The default field postfix for alpha-numeric sorting. */ 152 public static final String FIELD_POSTFIX_SORT = "_sort"; 153 154 /** 155 * Name of the field that contains the (optional) document priority, 156 * which can be used to boost the document in the result list (hardcoded). 157 */ 158 public static final String FIELD_PRIORITY = "priority"; 159 160 /** Name of the field that contains the resource locales of the document. */ 161 public static final String FIELD_RESOURCE_LOCALES = "res_locales"; 162 163 /** The name of the score field. */ 164 public static final String FIELD_SCORE = "score"; 165 166 /** Name of the field that contains the searched property value of 'search.exclude'. */ 167 public static final String FIELD_SEARCH_EXCLUDE = "search_exclude"; 168 169 /** Name of the field that usually contains file size. */ 170 public static final String FIELD_SIZE = "size"; 171 172 /** Name of the field that contains the lower-case title, untokenized, for sorting. */ 173 public static final String FIELD_SORT_TITLE = "sort-title"; 174 175 /** Name of the field that contains the resource state. */ 176 public static final String FIELD_STATE = "state"; 177 178 /** Name of the field that contains the file name suffix of the resource. */ 179 public static final String FIELD_SUFFIX = "suffix"; 180 181 /** Name of the field that contains the general text of a resource and also serves as prefix. */ 182 public static final String FIELD_TEXT = "text"; 183 184 /** 185 * Name of the field that usually contains the value of the "Title" property of the document 186 * as a keyword used for sorting and also for retrieving the title text (optional). 187 * 188 * Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p> 189 */ 190 public static final String FIELD_TITLE = "title-key"; 191 192 /** 193 * Name of the field that usually contains the value of the "Title" property of the document 194 * in an analyzed form used for searching in the title (optional). 195 */ 196 public static final String FIELD_TITLE_UNSTORED = "title"; 197 198 // TODO: Comments 199 public static final String FIELD_TIMESTAMP = "timestamp"; 200 public static final String FIELD_PATH_HIERARCHY = "path_hierarchy"; 201 /** Name of the field that contains the gallery index container information. */ 202 public static final String FIELD_CONTAINER_TYPES = "container_types"; 203 public static final String FIELD_CATEGORY_EXACT = "category_exact"; 204 /** Name of the field that contains the gallery index additional information. */ 205 public static final String FIELD_ADDITIONAL_INFO = "additional_info"; 206 public static final String FIELD_PLACE = "place"; 207 public static final String FIELD_SPELL = "spell"; 208 // TODO: concat those field names; "text" + locale, where needed like content fields or exceprt fields 209 public static final String FIELD_TEXT_EN = "text_en"; 210 public static final String FIELD_TEXT_DE = "text_de"; 211 public static final String FIELD_TEXT_EL = "text_el"; 212 public static final String FIELD_TEXT_ES = "text_es"; 213 public static final String FIELD_TEXT_FR = "text_fr"; 214 public static final String FIELD_TEXT_HU = "text_hu"; 215 public static final String FIELD_TEXT_IT = "text_it"; 216 public static final String FIELD_SEARCH_CHANNEL = "search_channel"; 217 218 /** The field PREFIX of the fields that contain the display title (without locale and postfix "_s"). */ 219 public static final String FIELD_DISPTITLE = "disptitle"; 220 221 /** The field PREFIX of the fields that contain the display order (without locale and postfix "_i"). */ 222 public static final String FIELD_DISPORDER = "disporder"; 223 224 /** The field PREFIX where the start date for the single entry of a serial date entry set is stored. */ 225 public static final String FIELD_INSTANCEDATE = "instancedate"; 226 227 /** The field PREFIX where the end date for the single entry of a serial date entry set is stored. */ 228 public static final String FIELD_INSTANCEDATE_END = "instancedateend"; 229 230 /** The field PREFIX where the date until which the single entry of a serial date entry should be treated as "current" is stored. */ 231 public static final String FIELD_INSTANCEDATE_CURRENT_TILL = "instancedatecurrenttill"; 232 233 /** The field where the dates for a serial date are stored. */ 234 public static final String FIELD_SERIESDATES = "seriesdates" + FIELD_POSTFIX_DATES; 235 236 /** The field where the end dates for a serial date are stored. 237 * NOTE: The field is only used during indexing and not stored in the content itself. 238 */ 239 public static final String FIELD_SERIESDATES_END = "seriesdatesend" + FIELD_POSTFIX_DATES; 240 241 /** The field where the dates until when the single serial dates are treated as "current" are stored. 242 * NOTE: The field is only used during indexing and not stored in the content itself. 243 */ 244 public static final String FIELD_SERIESDATES_CURRENT_TILL = "seriesdatescurrenttill" + FIELD_POSTFIX_DATES; 245 246 /** The field where the type of the date series is stored. */ 247 public static final String FIELD_SERIESDATES_TYPE = "seriesdatestype" + FIELD_POSTFIX_STRING; 248 249 /** Name of the field that contains the type of the document. */ 250 public static final String FIELD_TYPE = "type"; 251 252 /** Name of the field that contains the user created. */ 253 public static final String FIELD_USER_CREATED = "userCreated"; 254 255 /** Name of the field that contains the user last modified. */ 256 public static final String FIELD_USER_LAST_MODIFIED = "userLastModified"; 257 258 /** Name of the field that contains the latest version number of the resource. */ 259 public static final String FIELD_VERSION = "version"; 260 261 /** Name of the field that contains the unique Solr id. */ 262 public static final String FIELD_SOLR_ID = "solr_id"; 263 264 /** Serial version UID. */ 265 private static final long serialVersionUID = 3185631015824549119L; 266 267 /** A default value for the field in case the content does not provide the value. */ 268 private String m_defaultValue; 269 270 /** Indicates if this field should be used for generating the excerpt. */ 271 private boolean m_excerpt; 272 273 /** Indicates if the content of this field should be indexed. */ 274 private boolean m_indexed; 275 276 /** The search field mappings. */ 277 private List<I_CmsSearchFieldMapping> m_mappings; 278 279 /** The name of the field. */ 280 private String m_name; 281 282 /** Indicates if the content of this field should be stored. */ 283 private boolean m_stored; 284 285 /** 286 * Creates a new search field.<p> 287 */ 288 public CmsSearchField() { 289 290 m_mappings = new ArrayList<I_CmsSearchFieldMapping>(); 291 } 292 293 /** 294 * Creates a new search field.<p> 295 * 296 * @param name the name of the field, see {@link #setName(String)} 297 * @param defaultValue the default value to use, see {@link #setDefaultValue(String)} 298 * 299 */ 300 public CmsSearchField(String name, String defaultValue) { 301 302 this(); 303 m_name = name; 304 m_defaultValue = defaultValue; 305 } 306 307 /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}. 308 * The method adds all default fields. 309 * @param uninvertingMap the map to which the fields are added. 310 */ 311 public static void addUninvertingMappings(Map<String, Type> uninvertingMap) { 312 313 uninvertingMap.put(FIELD_CATEGORY, Type.SORTED); 314 uninvertingMap.put(FIELD_CONTENT, Type.SORTED); 315 uninvertingMap.put(FIELD_CONTENT_BLOB, Type.SORTED); 316 uninvertingMap.put(FIELD_CONTENT_LOCALES, Type.SORTED); 317 uninvertingMap.put(FIELD_DATE_CONTENT, Type.SORTED); 318 uninvertingMap.put(FIELD_DATE_CREATED, Type.SORTED); 319 uninvertingMap.put(FIELD_DATE_CREATED_LOOKUP, Type.SORTED); 320 uninvertingMap.put(FIELD_DATE_EXPIRED, Type.SORTED); 321 uninvertingMap.put(FIELD_DATE_LASTMODIFIED, Type.SORTED); 322 uninvertingMap.put(FIELD_DATE_LASTMODIFIED_LOOKUP, Type.SORTED); 323 uninvertingMap.put(FIELD_DATE_LOOKUP_SUFFIX, Type.SORTED); 324 uninvertingMap.put(FIELD_DATE_RELEASED, Type.SORTED); 325 uninvertingMap.put(FIELD_DEPENDENCY_TYPE, Type.SORTED); 326 uninvertingMap.put(FIELD_DESCRIPTION, Type.SORTED); 327 uninvertingMap.put(FIELD_DYNAMIC_EXACT, Type.SORTED); 328 uninvertingMap.put(FIELD_DYNAMIC_PROPERTIES, Type.SORTED); 329 uninvertingMap.put(FIELD_EXCERPT, Type.SORTED); 330 uninvertingMap.put(FIELD_FILENAME, Type.SORTED); 331 uninvertingMap.put(FIELD_ID, Type.SORTED); 332 uninvertingMap.put(FIELD_KEYWORDS, Type.SORTED); 333 uninvertingMap.put(FIELD_LINK, Type.SORTED); 334 uninvertingMap.put(FIELD_META, Type.SORTED); 335 uninvertingMap.put(FIELD_MIMETYPE, Type.SORTED); 336 uninvertingMap.put(FIELD_PARENT_FOLDERS, Type.SORTED); 337 uninvertingMap.put(FIELD_PATH, Type.SORTED); 338 uninvertingMap.put(FIELD_PREFIX_DEPENDENCY, Type.SORTED); 339 uninvertingMap.put(FIELD_PREFIX_DYNAMIC, Type.SORTED); 340 uninvertingMap.put(FIELD_PREFIX_TEXT, Type.SORTED); 341 uninvertingMap.put(FIELD_PRIORITY, Type.SORTED); 342 uninvertingMap.put(FIELD_RESOURCE_LOCALES, Type.SORTED); 343 uninvertingMap.put(FIELD_SCORE, Type.SORTED); 344 uninvertingMap.put(FIELD_SEARCH_EXCLUDE, Type.SORTED); 345 uninvertingMap.put(FIELD_SIZE, Type.SORTED); 346 uninvertingMap.put(FIELD_SORT_TITLE, Type.SORTED); 347 uninvertingMap.put(FIELD_STATE, Type.SORTED); 348 uninvertingMap.put(FIELD_SUFFIX, Type.SORTED); 349 uninvertingMap.put(FIELD_TEXT, Type.SORTED); 350 uninvertingMap.put(FIELD_TITLE, Type.SORTED); 351 uninvertingMap.put(FIELD_TITLE_UNSTORED, Type.SORTED); 352 uninvertingMap.put(FIELD_TYPE, Type.SORTED); 353 uninvertingMap.put(FIELD_USER_CREATED, Type.SORTED); 354 uninvertingMap.put(FIELD_USER_LAST_MODIFIED, Type.SORTED); 355 uninvertingMap.put(FIELD_VERSION, Type.SORTED); 356 } 357 358 /** 359 * Adds a new field mapping to the internal list of mappings.<p> 360 * 361 * @param mapping the mapping to add 362 */ 363 public void addMapping(I_CmsSearchFieldMapping mapping) { 364 365 m_mappings.add(mapping); 366 } 367 368 /** 369 * Two fields are equal if the name of the Lucene field is equal.<p> 370 * 371 * @see java.lang.Object#equals(java.lang.Object) 372 */ 373 @Override 374 public boolean equals(Object obj) { 375 376 if ((obj instanceof CmsSearchField)) { 377 return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).getName()); 378 } 379 return false; 380 } 381 382 /** 383 * Returns the default value to use if no content for this field was collected.<p> 384 * 385 * In case no default is configured, <code>null</code> is returned.<p> 386 * 387 * @return the default value to use if no content for this field was collected 388 */ 389 public String getDefaultValue() { 390 391 return m_defaultValue; 392 } 393 394 /** 395 * Returns the String value state of this field if it is indexed (and possibly tokenized) in the index.<p> 396 * 397 * <b>IMPORTANT:</b> Not supported by Solr 398 * 399 * @return the String value state of this field if it is indexed (and possibly tokenized) in the index 400 */ 401 public String getIndexed() { 402 403 return null; 404 } 405 406 /** 407 * Returns the mappings for this field.<p> 408 * 409 * @return the mappings for this field 410 */ 411 public List<I_CmsSearchFieldMapping> getMappings() { 412 413 return m_mappings; 414 } 415 416 /** 417 * Returns the name of this field in the Lucene search index.<p> 418 * 419 * @return the name of this field in the Lucene search index 420 */ 421 public String getName() { 422 423 return m_name; 424 } 425 426 /** 427 * The hash code for a field is based only on the field name.<p> 428 * 429 * @see java.lang.Object#hashCode() 430 */ 431 @Override 432 public int hashCode() { 433 434 return m_name == null ? 41 : m_name.hashCode(); 435 } 436 437 /** 438 * Returns the indexed.<p> 439 * 440 * @return the indexed 441 */ 442 public boolean isIndexed() { 443 444 return m_indexed; 445 } 446 447 /** 448 * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> 449 * 450 * @return <code>true</code> if this fields content is used in the search result excerpt 451 * 452 * @see #isStored() 453 */ 454 public boolean isInExcerpt() { 455 456 return m_excerpt; 457 } 458 459 /** 460 * Returns <code>true</code> if the content of this field is stored in the Lucene index.<p> 461 * 462 * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} 463 * for the concept behind stored and unstored fields.<p> 464 * 465 * @return <code>true</code> if the content of this field is stored in the Lucene index 466 */ 467 public boolean isStored() { 468 469 return m_stored; 470 } 471 472 /** 473 * Sets the default value to use if no content for this field was collected.<p> 474 * 475 * @param defaultValue the default value to set 476 */ 477 public void setDefaultValue(String defaultValue) { 478 479 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) { 480 m_defaultValue = defaultValue.trim(); 481 } else { 482 m_defaultValue = null; 483 } 484 } 485 486 /** 487 * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p> 488 * 489 * @param indexed the indexed to set 490 */ 491 public void setIndexed(boolean indexed) { 492 493 m_indexed = indexed; 494 } 495 496 /** 497 * Controls if this fields content is used in the search result excerpt.<p> 498 * 499 * @param excerpt if <code>true</code>, then this fields content is used in the search excerpt 500 */ 501 public void setInExcerpt(boolean excerpt) { 502 503 m_excerpt = excerpt; 504 } 505 506 /** 507 * Sets the name of this field in the Lucene search index.<p> 508 * 509 * @param fieldName the name to set 510 */ 511 public void setName(String fieldName) { 512 513 m_name = fieldName; 514 } 515 516 /** 517 * Controls if the content of this field is stored in the Lucene index.<p> 518 * 519 * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} 520 * for the concept behind stored and unstored fields.<p> 521 * 522 * @param stored if <code>true</code>, then the field content is stored 523 */ 524 public void setStored(boolean stored) { 525 526 m_stored = stored; 527 } 528 529 /** 530 * @see java.lang.Object#toString() 531 */ 532 @Override 533 public String toString() { 534 535 return getName(); 536 } 537}