001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.fields; 029 030import org.apache.lucene.analysis.Analyzer; 031import org.apache.lucene.document.Field; 032import org.apache.lucene.document.FieldType; 033import org.apache.lucene.index.IndexOptions; 034import org.opencms.search.CmsSearchManager; 035import org.opencms.util.CmsStringUtil; 036 037/** 038 * An individual field configuration in a Lucene search index.<p> 039 * 040 * @since 7.0.0 041 */ 042public class CmsLuceneField extends CmsSearchField { 043 044 /** Value of m_displayName if field should not be displayed. */ 045 public static final String IGNORE_DISPLAY_NAME = "-"; 046 047 /** Constant for the "compress" index setting. */ 048 public static final String STR_COMPRESS = "compress"; 049 050 /** Constant for the "no" index setting. */ 051 public static final String STR_NO = "no"; 052 053 /** Constant for the "tokenized" index setting. */ 054 public static final String STR_TOKENIZED = "tokenized"; 055 056 /** Constant for the "untokenized" index setting. */ 057 public static final String STR_UN_TOKENIZED = "untokenized"; 058 059 /** Constant for the "yes" index setting. */ 060 public static final String STR_YES = "yes"; 061 062 /** The serial version UID. */ 063 private static final long serialVersionUID = -4946013624087640706L; 064 065 /** The special analyzer to use for this field. */ 066 private Analyzer m_analyzer; 067 068 /** Indicates if the content of this field is compressed. */ 069 private boolean m_compressed; 070 071 /** Indicates if this field should be displayed. */ 072 private boolean m_displayed; 073 074 /** The display name of the field. */ 075 private String m_displayName; 076 077 /** The display name set from the configuration. */ 078 private String m_displayNameForConfiguration; 079 080 /** Indicates if the content of this field should be tokenized. */ 081 private boolean m_tokenized; 082 083 /** The type used to convert a field to a Solr field. */ 084 private String m_type; 085 086 /** 087 * Creates a new search field configuration.<p> 088 */ 089 public CmsLuceneField() { 090 091 super(); 092 } 093 094 /** 095 * Creates a new search field configuration.<p> 096 * 097 * The field will be tokenized if it is indexed. 098 * The field will not be in the excerpt. 099 * There is no default value.<p> 100 * 101 * @param name the name of the field, see {@link #setName(String)} 102 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 103 * @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)} 104 * @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)} 105 */ 106 public CmsLuceneField(String name, String displayName, boolean isStored, boolean isIndexed) { 107 108 this(name, displayName, isStored, isIndexed, isIndexed, false, null); 109 } 110 111 /** 112 * Creates a new search field configuration.<p> 113 * 114 * @param name the name of the field, see {@link #setName(String)} 115 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 116 * @param isStored controls if the field is stored, see {@link #setStored(boolean)} 117 * @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)} 118 * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} 119 * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} 120 * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} 121 * @param analyzer the analyzer to use, see {@link #setAnalyzer(Analyzer)} 122 * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} 123 */ 124 public CmsLuceneField( 125 String name, 126 String displayName, 127 boolean isStored, 128 boolean isCompressed, 129 boolean isIndexed, 130 boolean isTokenized, 131 boolean isInExcerpt, 132 Analyzer analyzer, 133 String defaultValue) { 134 135 super(name, defaultValue); 136 setDisplayName(displayName); 137 setStored(isStored); 138 setCompressed(isCompressed); 139 setIndexed(isIndexed); 140 setTokenized(isTokenized); 141 setInExcerpt(isInExcerpt); 142 setAnalyzer(analyzer); 143 } 144 145 /** 146 * Creates a new search field configuration.<p> 147 * 148 * @param name the name of the field, see {@link #setName(String)} 149 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 150 * @param isStored controls if the field is stored, see {@link #setStored(boolean)} 151 * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} 152 * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} 153 * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} 154 * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} 155 */ 156 public CmsLuceneField( 157 String name, 158 String displayName, 159 boolean isStored, 160 boolean isIndexed, 161 boolean isTokenized, 162 boolean isInExcerpt, 163 String defaultValue) { 164 165 this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, defaultValue); 166 } 167 168 /** 169 * Creates a field from the configuration and the provided content.<p> 170 * 171 * The configured name of the field as provided by {@link #getName()} is used.<p> 172 * 173 * If no valid content is provided (that is the content is either <code>null</code> or 174 * only whitespace), then no field is created and <code>null</code> is returned.<p> 175 * 176 * @param content the content to create the field with 177 * 178 * @return a field created from the configuration and the provided content 179 */ 180 public Field createField(String content) { 181 182 return createField(getName(), content); 183 } 184 185 /** 186 * Creates a field with the given name from the configuration and the provided content.<p> 187 * 188 * If no valid content is provided (that is the content is either <code>null</code> or 189 * only whitespace), then no field is created and <code>null</code> is returned.<p> 190 * 191 * @param name the name of the field to create 192 * @param content the content to create the field with 193 * 194 * @return a field with the given name from the configuration and the provided content 195 */ 196 public Field createField(String name, String content) { 197 198 if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) { 199 content = getDefaultValue(); 200 } 201 if (content != null) { 202 final FieldType ft = new FieldType(); 203 if (isIndexed()) { 204 if (isTokenizedAndIndexed()) { 205 ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 206 ft.setTokenized(true); 207 } else { 208 ft.setIndexOptions(IndexOptions.DOCS); 209 ft.setTokenized(false); 210 } 211 } 212 ft.setStored(isStored() || isCompressed()); 213 Field result = new Field(name, content, ft); 214 return result; 215 } 216 return null; 217 } 218 219 /** 220 * Returns the analyzer used for this field.<p> 221 * 222 * @return the analyzer used for this field 223 */ 224 public Analyzer getAnalyzer() { 225 226 return m_analyzer; 227 } 228 229 /** 230 * Returns the display name of the field.<p> 231 * 232 * @return the display name of the field 233 */ 234 public String getDisplayName() { 235 236 if (!isDisplayed()) { 237 return IGNORE_DISPLAY_NAME; 238 } 239 if (m_displayName == null) { 240 return getName(); 241 } else { 242 return m_displayName; 243 } 244 } 245 246 /** 247 * Returns the displayNameForConfiguration.<p> 248 * 249 * @return the displayNameForConfiguration 250 */ 251 public String getDisplayNameForConfiguration() { 252 253 return m_displayNameForConfiguration; 254 } 255 256 /** 257 * Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p> 258 * 259 * @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index 260 * 261 * @see #isTokenizedAndIndexed() 262 * @see #isIndexed() 263 */ 264 @Override 265 public String getIndexed() { 266 267 if (isTokenizedAndIndexed()) { 268 return String.valueOf(isTokenizedAndIndexed()); 269 } 270 if (isIndexed()) { 271 return STR_UN_TOKENIZED; 272 } else { 273 return String.valueOf(isIndexed()); 274 } 275 } 276 277 /** 278 * Returns the type.<p> 279 * 280 * @return the type 281 */ 282 public String getType() { 283 284 return m_type; 285 } 286 287 /** 288 * Returns <code>true</code> if the content of this field is compressed.<p> 289 * 290 * If the field is compressed, it must also be stored, this means 291 * {@link #isStored()} will always return <code>true</code> for compressed fields.<p> 292 * 293 * @return <code>true</code> if the content of this field is compressed 294 */ 295 public boolean isCompressed() { 296 297 return m_compressed; 298 } 299 300 /** 301 * Returns true if the field should be displayed.<p> 302 * 303 * @return returns true if the field should be displayed otherwise false 304 */ 305 public boolean isDisplayed() { 306 307 return m_displayed; 308 } 309 310 /** 311 * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> 312 * 313 * A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p> 314 * 315 * @return <code>true</code> if this fields content is used in the search result excerpt 316 * 317 * @see #isStored() 318 */ 319 public boolean isInExcerptAndStored() { 320 321 return isInExcerpt() && isStored(); 322 } 323 324 /** 325 * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> 326 * 327 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 328 * 329 * @return <code>true</code> if the content of this field is tokenized in the Lucene index 330 */ 331 public boolean isTokenized() { 332 333 return m_tokenized; 334 } 335 336 /** 337 * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> 338 * 339 * A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p> 340 * 341 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 342 * 343 * @return <code>true</code> if the content of this field is tokenized in the Lucene index 344 * 345 * @see #isStored() 346 * @see #isIndexed() 347 */ 348 public boolean isTokenizedAndIndexed() { 349 350 return m_tokenized && isIndexed(); 351 } 352 353 /** 354 * Closes the analyzer.<p> 355 */ 356 public void closeAnalyzer() { 357 358 if (m_analyzer != null) { 359 m_analyzer.close(); 360 } 361 } 362 363 /** 364 * Sets the analyzer used for this field.<p> 365 * 366 * @param analyzer the analyzer to set 367 */ 368 public void setAnalyzer(Analyzer analyzer) { 369 370 m_analyzer = analyzer; 371 } 372 373 /** 374 * Sets the analyzer used for this field.<p> 375 * 376 * The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface. 377 * 378 * @param analyzerName the analyzer class name to set 379 * 380 * @throws Exception in case of problems creating the analyzer class instance 381 */ 382 public void setAnalyzer(String analyzerName) throws Exception { 383 384 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(analyzerName)) { 385 setAnalyzer(CmsSearchManager.getAnalyzer(analyzerName)); 386 } 387 } 388 389 /** 390 * Controls if this field value will be stored compressed or not.<p> 391 * 392 * If this is set to <code>true</code>, the value for {@link #isStored()} will also 393 * be set to <code>true</code>, since compressed fields are always stored.<p> 394 * 395 * @param compressed if <code>true</code>, the field value will be stored compressed 396 */ 397 public void setCompressed(boolean compressed) { 398 399 m_compressed = compressed; 400 if (compressed) { 401 setStored(true); 402 } 403 } 404 405 /** 406 * Controls if the field is displayed or not.<p> 407 * 408 * @param displayed if true the field is displayed 409 */ 410 public void setDisplayed(boolean displayed) { 411 412 m_displayed = displayed; 413 } 414 415 /** 416 * Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p> 417 * 418 * @param displayName the display name to set 419 */ 420 public void setDisplayName(String displayName) { 421 422 if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) { 423 m_displayName = null; 424 setDisplayed(false); 425 } else { 426 m_displayName = displayName; 427 m_displayNameForConfiguration = displayName; 428 setDisplayed(true); 429 } 430 } 431 432 /** 433 * Sets the displayNameForConfiguration.<p> 434 * 435 * @param displayNameForConfiguration the displayNameForConfiguration to set 436 */ 437 public void setDisplayNameForConfiguration(String displayNameForConfiguration) { 438 439 m_displayNameForConfiguration = displayNameForConfiguration; 440 setDisplayName(displayNameForConfiguration); 441 } 442 443 /** 444 * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p> 445 * 446 * This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p> 447 * 448 * The parameter can have the following values: 449 * <ul> 450 * <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized. 451 * <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized. 452 * <li><b>"untokenized"</b>: The field is indexed but not tokenized. 453 * </ul> 454 * 455 * @param indexed the index setting to use 456 * 457 * @see #setIndexed(boolean) 458 * @see #setTokenized(boolean) 459 */ 460 public void setIndexed(String indexed) { 461 462 boolean isIndexed = false; 463 boolean isTokenized = false; 464 if (indexed != null) { 465 indexed = indexed.trim().toLowerCase(); 466 if (STR_TOKENIZED.equals(indexed)) { 467 isIndexed = true; 468 isTokenized = true; 469 } else if (STR_UN_TOKENIZED.equals(indexed)) { 470 isIndexed = true; 471 } else if (STR_NO.equals(indexed)) { 472 // "no", both values will be false 473 } else { 474 // only "true" or "false" remain 475 isIndexed = Boolean.valueOf(indexed).booleanValue(); 476 isTokenized = isIndexed; 477 } 478 } 479 setIndexed(isIndexed); 480 setTokenized(isTokenized); 481 } 482 483 /** 484 * Controls if this fields content is used in the search result excerpt.<p> 485 * 486 * @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt 487 * 488 * @see #setInExcerpt(boolean) 489 */ 490 public void setInExcerpt(String excerpt) { 491 492 setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue()); 493 } 494 495 /** 496 * Controls if the content of this field is stored in the Lucene index from a String parameter.<p> 497 * 498 * @param stored if <code>"true"</code>, then the field content is stored 499 * 500 * @see #setStored(boolean) 501 */ 502 public void setStored(String stored) { 503 504 boolean isStored = false; 505 boolean isCompressed = false; 506 if (stored != null) { 507 stored = stored.trim().toLowerCase(); 508 if (STR_COMPRESS.equals(stored)) { 509 isCompressed = true; 510 isStored = true; 511 } else if (STR_YES.equals(stored)) { 512 // "yes", value will be stored but not compressed 513 isStored = true; 514 } else { 515 // only "true" or "false" remain 516 isStored = Boolean.valueOf(stored).booleanValue(); 517 } 518 } 519 setStored(isStored); 520 setCompressed(isCompressed); 521 } 522 523 /** 524 * Controls if the content of this field is tokenized in the Lucene index.<p> 525 * 526 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 527 * 528 * @param tokenized if <code>true</code>, then the field content is tokenized 529 * 530 * @see #setStored(boolean) 531 */ 532 public void setTokenized(boolean tokenized) { 533 534 m_tokenized = tokenized; 535 } 536 537 /** 538 * Sets the type.<p> 539 * 540 * @param type the type to set 541 */ 542 public void setType(String type) { 543 544 m_type = type; 545 } 546}