001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.fields;
029
030import org.apache.lucene.analysis.Analyzer;
031import org.apache.lucene.document.Field;
032import org.apache.lucene.document.FieldType;
033import org.apache.lucene.index.IndexOptions;
034import org.opencms.search.CmsSearchManager;
035import org.opencms.util.CmsStringUtil;
036
037/**
038 * An individual field configuration in a Lucene search index.<p>
039 *
040 * @since 7.0.0
041 */
042public class CmsLuceneField extends CmsSearchField {
043
044    /** Value of m_displayName if field should not be displayed. */
045    public static final String IGNORE_DISPLAY_NAME = "-";
046
047    /** Constant for the "compress" index setting. */
048    public static final String STR_COMPRESS = "compress";
049
050    /** Constant for the "no" index setting. */
051    public static final String STR_NO = "no";
052
053    /** Constant for the "tokenized" index setting. */
054    public static final String STR_TOKENIZED = "tokenized";
055
056    /** Constant for the "untokenized" index setting. */
057    public static final String STR_UN_TOKENIZED = "untokenized";
058
059    /** Constant for the "yes" index setting. */
060    public static final String STR_YES = "yes";
061
062    /** The serial version UID. */
063    private static final long serialVersionUID = -4946013624087640706L;
064
065    /** The special analyzer to use for this field. */
066    private Analyzer m_analyzer;
067
068    /** Indicates if the content of this field is compressed. */
069    private boolean m_compressed;
070
071    /** Indicates if this field should be displayed. */
072    private boolean m_displayed;
073
074    /** The display name of the field. */
075    private String m_displayName;
076
077    /** The display name set from the configuration. */
078    private String m_displayNameForConfiguration;
079
080    /** Indicates if the content of this field should be tokenized. */
081    private boolean m_tokenized;
082
083    /** The type used to convert a field to a Solr field. */
084    private String m_type;
085
086    /**
087     * Creates a new search field configuration.<p>
088     */
089    public CmsLuceneField() {
090
091        super();
092    }
093
094    /**
095     * Creates a new search field configuration.<p>
096     *
097     * The field will be tokenized if it is indexed.
098     * The field will not be in the excerpt.
099     * There is no default value.<p>
100     *
101     * @param name the name of the field, see {@link #setName(String)}
102     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
103     * @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)}
104     * @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)}
105     */
106    public CmsLuceneField(String name, String displayName, boolean isStored, boolean isIndexed) {
107
108        this(name, displayName, isStored, isIndexed, isIndexed, false, null);
109    }
110
111    /**
112     * Creates a new search field configuration.<p>
113     *
114     * @param name the name of the field, see {@link #setName(String)}
115     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
116     * @param isStored controls if the field is stored, see {@link #setStored(boolean)}
117     * @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)}
118     * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
119     * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
120     * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
121     * @param analyzer the analyzer to use, see {@link #setAnalyzer(Analyzer)}
122     * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
123     */
124    public CmsLuceneField(
125        String name,
126        String displayName,
127        boolean isStored,
128        boolean isCompressed,
129        boolean isIndexed,
130        boolean isTokenized,
131        boolean isInExcerpt,
132        Analyzer analyzer,
133        String defaultValue) {
134
135        super(name, defaultValue);
136        setDisplayName(displayName);
137        setStored(isStored);
138        setCompressed(isCompressed);
139        setIndexed(isIndexed);
140        setTokenized(isTokenized);
141        setInExcerpt(isInExcerpt);
142        setAnalyzer(analyzer);
143    }
144
145    /**
146     * Creates a new search field configuration.<p>
147     *
148     * @param name the name of the field, see {@link #setName(String)}
149     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
150     * @param isStored controls if the field is stored, see {@link #setStored(boolean)}
151     * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
152     * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
153     * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
154     * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
155     */
156    public CmsLuceneField(
157        String name,
158        String displayName,
159        boolean isStored,
160        boolean isIndexed,
161        boolean isTokenized,
162        boolean isInExcerpt,
163        String defaultValue) {
164
165        this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, defaultValue);
166    }
167
168    /**
169     * Creates a field from the configuration and the provided content.<p>
170     *
171     * The configured name of the field as provided by {@link #getName()} is used.<p>
172     *
173     * If no valid content is provided (that is the content is either <code>null</code> or
174     * only whitespace), then no field is created and <code>null</code> is returned.<p>
175     *
176     * @param content the content to create the field with
177     *
178     * @return a field created from the configuration and the provided content
179     */
180    public Field createField(String content) {
181
182        return createField(getName(), content);
183    }
184
185    /**
186     * Creates a field with the given name from the configuration and the provided content.<p>
187     *
188     * If no valid content is provided (that is the content is either <code>null</code> or
189     * only whitespace), then no field is created and <code>null</code> is returned.<p>
190     *
191     * @param name the name of the field to create
192     * @param content the content to create the field with
193     *
194     * @return a field with the given name from the configuration and the provided content
195     */
196    public Field createField(String name, String content) {
197
198        if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
199            content = getDefaultValue();
200        }
201        if (content != null) {
202            final FieldType ft = new FieldType();
203            if (isIndexed()) {
204                if (isTokenizedAndIndexed()) {
205                    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
206                    ft.setTokenized(true);
207                } else {
208                    ft.setIndexOptions(IndexOptions.DOCS);
209                    ft.setTokenized(false);
210                }
211            }
212            ft.setStored(isStored() || isCompressed());
213            Field result = new Field(name, content, ft);
214            return result;
215        }
216        return null;
217    }
218
219    /**
220     * Returns the analyzer used for this field.<p>
221     *
222     * @return the analyzer used for this field
223     */
224    public Analyzer getAnalyzer() {
225
226        return m_analyzer;
227    }
228
229    /**
230     * Returns the display name of the field.<p>
231     *
232     * @return the display name of the field
233     */
234    public String getDisplayName() {
235
236        if (!isDisplayed()) {
237            return IGNORE_DISPLAY_NAME;
238        }
239        if (m_displayName == null) {
240            return getName();
241        } else {
242            return m_displayName;
243        }
244    }
245
246    /**
247     * Returns the displayNameForConfiguration.<p>
248     *
249     * @return the displayNameForConfiguration
250     */
251    public String getDisplayNameForConfiguration() {
252
253        return m_displayNameForConfiguration;
254    }
255
256    /**
257     * Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p>
258     *
259     * @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index
260     *
261     * @see #isTokenizedAndIndexed()
262     * @see #isIndexed()
263     */
264    @Override
265    public String getIndexed() {
266
267        if (isTokenizedAndIndexed()) {
268            return String.valueOf(isTokenizedAndIndexed());
269        }
270        if (isIndexed()) {
271            return STR_UN_TOKENIZED;
272        } else {
273            return String.valueOf(isIndexed());
274        }
275    }
276
277    /**
278     * Returns the type.<p>
279     *
280     * @return the type
281     */
282    public String getType() {
283
284        return m_type;
285    }
286
287    /**
288     * Returns <code>true</code> if the content of this field is compressed.<p>
289     *
290     * If the field is compressed, it must also be stored, this means
291     * {@link #isStored()} will always return <code>true</code> for compressed fields.<p>
292     *
293     * @return <code>true</code> if the content of this field is compressed
294     */
295    public boolean isCompressed() {
296
297        return m_compressed;
298    }
299
300    /**
301     * Returns true if the field should be displayed.<p>
302     *
303     * @return returns true if the field should be displayed otherwise false
304     */
305    public boolean isDisplayed() {
306
307        return m_displayed;
308    }
309
310    /**
311     * Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
312     *
313     * A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p>
314     *
315     * @return <code>true</code> if this fields content is used in the search result excerpt
316     *
317     * @see #isStored()
318     */
319    public boolean isInExcerptAndStored() {
320
321        return isInExcerpt() && isStored();
322    }
323
324    /**
325     * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
326     *
327     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
328     *
329     * @return <code>true</code> if the content of this field is tokenized in the Lucene index
330     */
331    public boolean isTokenized() {
332
333        return m_tokenized;
334    }
335
336    /**
337     * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
338     *
339     * A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p>
340     *
341     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
342     *
343     * @return <code>true</code> if the content of this field is tokenized in the Lucene index
344     *
345     * @see #isStored()
346     * @see #isIndexed()
347     */
348    public boolean isTokenizedAndIndexed() {
349
350        return m_tokenized && isIndexed();
351    }
352
353    /**
354     * Closes the analyzer.<p>
355     */
356    public void closeAnalyzer() {
357
358        if (m_analyzer != null) {
359            m_analyzer.close();
360        }
361    }
362
363    /**
364     * Sets the analyzer used for this field.<p>
365     *
366     * @param analyzer the analyzer to set
367     */
368    public void setAnalyzer(Analyzer analyzer) {
369
370        m_analyzer = analyzer;
371    }
372
373    /**
374     * Sets the analyzer used for this field.<p>
375     *
376     * The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface.
377     *
378     * @param analyzerName the analyzer class name to set
379     *
380     * @throws Exception in case of problems creating the analyzer class instance
381     */
382    public void setAnalyzer(String analyzerName) throws Exception {
383
384        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(analyzerName)) {
385            setAnalyzer(CmsSearchManager.getAnalyzer(analyzerName));
386        }
387    }
388
389    /**
390     * Controls if this field value will be stored compressed or not.<p>
391     *
392     * If this is set to <code>true</code>, the value for {@link #isStored()} will also
393     * be set to <code>true</code>, since compressed fields are always stored.<p>
394     *
395     * @param compressed if <code>true</code>, the field value will be stored compressed
396     */
397    public void setCompressed(boolean compressed) {
398
399        m_compressed = compressed;
400        if (compressed) {
401            setStored(true);
402        }
403    }
404
405    /**
406     * Controls if the field is displayed or not.<p>
407     *
408     * @param displayed if true the field is displayed
409     */
410    public void setDisplayed(boolean displayed) {
411
412        m_displayed = displayed;
413    }
414
415    /**
416     * Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p>
417     *
418     * @param displayName the display name to set
419     */
420    public void setDisplayName(String displayName) {
421
422        if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) {
423            m_displayName = null;
424            setDisplayed(false);
425        } else {
426            m_displayName = displayName;
427            m_displayNameForConfiguration = displayName;
428            setDisplayed(true);
429        }
430    }
431
432    /**
433     * Sets the displayNameForConfiguration.<p>
434     *
435     * @param displayNameForConfiguration the displayNameForConfiguration to set
436     */
437    public void setDisplayNameForConfiguration(String displayNameForConfiguration) {
438
439        m_displayNameForConfiguration = displayNameForConfiguration;
440        setDisplayName(displayNameForConfiguration);
441    }
442
443    /**
444     * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p>
445     *
446     * This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p>
447     *
448     * The parameter can have the following values:
449     * <ul>
450     * <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized.
451     * <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized.
452     * <li><b>"untokenized"</b>: The field is indexed but not tokenized.
453     * </ul>
454     *
455     * @param indexed the index setting to use
456     *
457     * @see #setIndexed(boolean)
458     * @see #setTokenized(boolean)
459     */
460    public void setIndexed(String indexed) {
461
462        boolean isIndexed = false;
463        boolean isTokenized = false;
464        if (indexed != null) {
465            indexed = indexed.trim().toLowerCase();
466            if (STR_TOKENIZED.equals(indexed)) {
467                isIndexed = true;
468                isTokenized = true;
469            } else if (STR_UN_TOKENIZED.equals(indexed)) {
470                isIndexed = true;
471            } else if (STR_NO.equals(indexed)) {
472                // "no", both values will be false
473            } else {
474                // only "true" or "false" remain
475                isIndexed = Boolean.valueOf(indexed).booleanValue();
476                isTokenized = isIndexed;
477            }
478        }
479        setIndexed(isIndexed);
480        setTokenized(isTokenized);
481    }
482
483    /**
484     * Controls if this fields content is used in the search result excerpt.<p>
485     *
486     * @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt
487     *
488     * @see #setInExcerpt(boolean)
489     */
490    public void setInExcerpt(String excerpt) {
491
492        setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue());
493    }
494
495    /**
496     * Controls if the content of this field is stored in the Lucene index from a String parameter.<p>
497     *
498     * @param stored if <code>"true"</code>, then the field content is stored
499     *
500     * @see #setStored(boolean)
501     */
502    public void setStored(String stored) {
503
504        boolean isStored = false;
505        boolean isCompressed = false;
506        if (stored != null) {
507            stored = stored.trim().toLowerCase();
508            if (STR_COMPRESS.equals(stored)) {
509                isCompressed = true;
510                isStored = true;
511            } else if (STR_YES.equals(stored)) {
512                // "yes", value will be stored but not compressed
513                isStored = true;
514            } else {
515                // only "true" or "false" remain
516                isStored = Boolean.valueOf(stored).booleanValue();
517            }
518        }
519        setStored(isStored);
520        setCompressed(isCompressed);
521    }
522
523    /**
524     * Controls if the content of this field is tokenized in the Lucene index.<p>
525     *
526     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
527     *
528     * @param tokenized if <code>true</code>, then the field content is tokenized
529     *
530     * @see #setStored(boolean)
531     */
532    public void setTokenized(boolean tokenized) {
533
534        m_tokenized = tokenized;
535    }
536
537    /**
538     * Sets the type.<p>
539     *
540     * @param type the type to set
541     */
542    public void setType(String type) {
543
544        m_type = type;
545    }
546}