001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
031import org.opencms.configuration.CmsConfigurationException;
032import org.opencms.db.CmsDriverManager;
033import org.opencms.db.CmsPublishedResource;
034import org.opencms.db.CmsResourceState;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsProject;
037import org.opencms.file.CmsResource;
038import org.opencms.file.CmsResourceFilter;
039import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
040import org.opencms.file.types.CmsResourceTypeXmlContent;
041import org.opencms.i18n.CmsMessageContainer;
042import org.opencms.loader.CmsLoaderException;
043import org.opencms.main.CmsEvent;
044import org.opencms.main.CmsException;
045import org.opencms.main.CmsIllegalArgumentException;
046import org.opencms.main.CmsIllegalStateException;
047import org.opencms.main.CmsLog;
048import org.opencms.main.I_CmsEventListener;
049import org.opencms.main.OpenCms;
050import org.opencms.main.OpenCmsSolrHandler;
051import org.opencms.relations.CmsRelation;
052import org.opencms.relations.CmsRelationFilter;
053import org.opencms.report.CmsLogReport;
054import org.opencms.report.I_CmsReport;
055import org.opencms.scheduler.I_CmsScheduledJob;
056import org.opencms.search.documents.A_CmsVfsDocument;
057import org.opencms.search.documents.CmsExtractionResultCache;
058import org.opencms.search.documents.I_CmsDocumentFactory;
059import org.opencms.search.documents.I_CmsTermHighlighter;
060import org.opencms.search.fields.CmsLuceneField;
061import org.opencms.search.fields.CmsLuceneFieldConfiguration;
062import org.opencms.search.fields.CmsSearchField;
063import org.opencms.search.fields.CmsSearchFieldConfiguration;
064import org.opencms.search.fields.CmsSearchFieldMapping;
065import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
066import org.opencms.search.solr.CmsSolrConfiguration;
067import org.opencms.search.solr.CmsSolrFieldConfiguration;
068import org.opencms.search.solr.CmsSolrIndex;
069import org.opencms.search.solr.I_CmsSolrIndexWriter;
070import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
071import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer;
072import org.opencms.security.CmsRole;
073import org.opencms.security.CmsRoleViolationException;
074import org.opencms.util.A_CmsModeStringEnumeration;
075import org.opencms.util.CmsFileUtil;
076import org.opencms.util.CmsStringUtil;
077import org.opencms.util.CmsUUID;
078import org.opencms.util.CmsWaitHandle;
079
080import java.io.File;
081import java.io.IOException;
082import java.nio.file.FileSystems;
083import java.nio.file.Paths;
084import java.util.ArrayList;
085import java.util.Collection;
086import java.util.Collections;
087import java.util.HashMap;
088import java.util.HashSet;
089import java.util.Iterator;
090import java.util.List;
091import java.util.Locale;
092import java.util.Map;
093import java.util.Set;
094import java.util.TreeMap;
095import java.util.concurrent.locks.ReentrantLock;
096
097import org.apache.commons.logging.Log;
098import org.apache.lucene.analysis.Analyzer;
099import org.apache.lucene.analysis.CharArraySet;
100import org.apache.lucene.analysis.standard.StandardAnalyzer;
101import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
102import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
103import org.apache.solr.core.CoreContainer;
104import org.apache.solr.core.CoreDescriptor;
105import org.apache.solr.core.SolrCore;
106
107/**
108 * Implements the general management and configuration of the search and
109 * indexing facilities in OpenCms.<p>
110 *
111 * @since 6.0.0
112 */
113public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
114
115    /**
116     *  Enumeration class for force unlock types.<p>
117     */
118    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
119
120        /** Force unlock type "always". */
121        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
122
123        /** Force unlock type "never". */
124        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
125
126        /** Force unlock type "only full". */
127        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
128
129        /** Serializable version id. */
130        private static final long serialVersionUID = 74746076708908673L;
131
132        /**
133         * Creates a new force unlock type with the given name.<p>
134         *
135         * @param mode the mode id to use
136         */
137        protected CmsSearchForceUnlockMode(String mode) {
138
139            super(mode);
140        }
141
142        /**
143         * Returns the lock type for the given type value.<p>
144         *
145         * @param type the type value to get the lock type for
146         *
147         * @return the lock type for the given type value
148         */
149        public static CmsSearchForceUnlockMode valueOf(String type) {
150
151            if (type.equals(ALWAYS.toString())) {
152                return ALWAYS;
153            } else if (type.equals(NEVER.toString())) {
154                return NEVER;
155            } else {
156                return ONLYFULL;
157            }
158        }
159    }
160
161    /**
162     * Handles offline index generation.<p>
163     */
164    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
165
166        /** Indicates if the event handlers for the offline search have been already registered. */
167        private boolean m_isEventRegistered;
168
169        /** The list of resources to index. */
170        private List<CmsPublishedResource> m_resourcesToIndex;
171
172        /**
173         * Initializes the offline index handler.<p>
174         */
175        protected CmsSearchOfflineHandler() {
176
177            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
178        }
179
180        /**
181         * Implements the event listener of this class.<p>
182         *
183         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
184         */
185        @SuppressWarnings("unchecked")
186        public void cmsEvent(CmsEvent event) {
187
188            switch (event.getType()) {
189                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
190                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
191                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
192                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
193                    Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
194                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
195                        // skip lock & unlock
196                        return;
197                    }
198                    // skip indexing if flag is set in event
199                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
200                    if (skip != null) {
201                        return;
202                    }
203
204                    // a resource has been modified - offline indexes require (re)indexing
205                    List<CmsResource> resources = Collections.singletonList(
206                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
207                    reIndexResources(resources);
208                    break;
209                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
210                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
211                        I_CmsEventListener.KEY_RESOURCES);
212                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
213                    for (CmsResource res : resourcesToDelete) {
214                        if (res.getState().isNew()) {
215                            // if the resource is new and a delete action was performed
216                            // --> set the state of the resource to deleted
217                            res.setState(CmsResourceState.STATE_DELETED);
218                        }
219                    }
220                    reIndexResources(resourcesToDelete);
221                    break;
222                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
223                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
224                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
225                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
226                    // a list of resources has been modified - offline indexes require (re)indexing
227                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
228                    break;
229                default:
230                    // no operation
231            }
232        }
233
234        /**
235         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
236         *
237         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
238         */
239        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
240
241            m_resourcesToIndex.addAll(resourcesToIndex);
242        }
243
244        /**
245         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
246         *
247         * @return the resources to index
248         */
249        protected List<CmsPublishedResource> getResourcesToIndex() {
250
251            List<CmsPublishedResource> result;
252            synchronized (this) {
253                result = m_resourcesToIndex;
254                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
255            }
256            try {
257                CmsObject cms = m_adminCms;
258                CmsProject offline = getOfflineIndexProject();
259                if (offline != null) {
260                    // switch to the offline project if available
261                    cms = OpenCms.initCmsObject(m_adminCms);
262                    cms.getRequestContext().setCurrentProject(offline);
263                }
264                findRelatedContainerPages(cms, result);
265            } catch (CmsException e) {
266                LOG.error(e.getLocalizedMessage(), e);
267            }
268            return result;
269        }
270
271        /**
272         * Initializes this offline search handler, registering the event handlers if required.<p>
273         */
274        protected void initialize() {
275
276            if (m_offlineIndexes.size() > 0) {
277                // there is at least one offline index configured
278                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
279                    // create the offline indexing thread
280                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
281                    // start the offline index thread
282                    m_offlineIndexThread.start();
283                }
284            } else {
285                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
286                    // no offline indexes but thread still running, stop the thread
287                    m_offlineIndexThread.shutDown();
288                    m_offlineIndexThread = null;
289                }
290            }
291            // do this only in case there are offline indexes configured
292            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
293                m_isEventRegistered = true;
294                // register this object as event listener
295                OpenCms.addCmsEventListener(
296                    this,
297                    new int[] {
298                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
299                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
300                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
301                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
302                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
303                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
304                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
305                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
306                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
307            }
308        }
309
310        /**
311         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
312         *
313         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
314         */
315        protected synchronized void reIndexResources(List<CmsResource> resources) {
316
317            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
318            for (CmsResource res : resources) {
319                CmsPublishedResource pubRes = new CmsPublishedResource(res);
320                resourcesToIndex.add(pubRes);
321            }
322            if (resourcesToIndex.size() > 0) {
323                // add the resources found to the offline index thread
324                addResourcesToIndex(resourcesToIndex);
325            }
326        }
327    }
328
329    /**
330     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
331     */
332    protected class CmsSearchOfflineIndexThread extends Thread {
333
334        /** The event handler that triggers this thread. */
335        CmsSearchOfflineHandler m_handler;
336
337        /** Indicates if this thread is still alive. */
338        boolean m_isAlive;
339
340        /** Indicates that an index update thread is currently running. */
341        private boolean m_isUpdating;
342
343        /** If true a manual update (after file upload) was triggered. */
344        private boolean m_updateTriggered;
345
346        /** The wait handle used for signalling when the worker thread has finished. */
347        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
348
349        /**
350         * Constructor.<p>
351         *
352         * @param handler the offline index event handler
353         */
354        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
355
356            super("OpenCms: Offline Search Indexer");
357            m_handler = handler;
358        }
359
360        /**
361         * Gets the wait handle used for signalling when the worker thread has finished.
362         *
363         * @return the wait handle
364         **/
365        public CmsWaitHandle getWaitHandle() {
366
367            return m_waitHandle;
368        }
369
370        /**
371         * @see java.lang.Thread#interrupt()
372         */
373        @Override
374        public void interrupt() {
375
376            super.interrupt();
377            m_updateTriggered = true;
378        }
379
380        /**
381         * @see java.lang.Thread#run()
382         */
383        @Override
384        public void run() {
385
386            // create a log report for the output
387            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
388            long offlineUpdateFrequency = getOfflineUpdateFrequency();
389            m_updateTriggered = false;
390            try {
391                while (m_isAlive) {
392                    if (!m_updateTriggered) {
393                        try {
394                            sleep(offlineUpdateFrequency);
395                        } catch (InterruptedException e) {
396                            // continue the thread after interruption
397                            if (!m_isAlive) {
398                                // the thread has been shut down while sleeping
399                                continue;
400                            }
401                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
402                                // offline update frequency change - clear interrupt status
403                                offlineUpdateFrequency = getOfflineUpdateFrequency();
404                            }
405                            LOG.info(e.getLocalizedMessage(), e);
406                        }
407                    }
408                    if (m_isAlive) {
409                        // set update trigger to false since we do the update now
410                        m_updateTriggered = false;
411                        // get list of resource to update
412                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
413                        if (resourcesToIndex.size() > 0) {
414                            // only start indexing if there is at least one resource
415                            startOfflineUpdateThread(report, resourcesToIndex);
416                        } else {
417                            getWaitHandle().release();
418                        }
419                        // this is just called to clear the interrupt status of the thread
420                        interrupted();
421                    }
422                }
423            } finally {
424                // make sure that live status is reset in case of Exceptions
425                m_isAlive = false;
426            }
427
428        }
429
430        /**
431         * @see java.lang.Thread#start()
432         */
433        @Override
434        public synchronized void start() {
435
436            m_isAlive = true;
437            super.start();
438        }
439
440        /**
441         * Obtains the list of resource to update in the offline index,
442         * then optimizes the list by removing duplicate entries.<p>
443         *
444         * @return the list of resource to update in the offline index
445         */
446        protected List<CmsPublishedResource> getResourcesToIndex() {
447
448            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
449            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
450
451            // Reverse to always keep the last list entries
452            Collections.reverse(resourcesToIndex);
453            for (CmsPublishedResource pubRes : resourcesToIndex) {
454                boolean addResource = true;
455                for (CmsPublishedResource resRes : result) {
456                    if (pubRes.equals(resRes)
457                        && (pubRes.getState() == resRes.getState())
458                        && (pubRes.getMovedState() == resRes.getMovedState())
459                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
460                        // resource already in the update list
461                        addResource = false;
462                        break;
463                    }
464                }
465                if (addResource) {
466                    result.add(pubRes);
467                }
468
469            }
470            Collections.reverse(result);
471            return changeStateOfMoveOriginsToDeleted(result);
472        }
473
474        /**
475         * Shuts down this offline index thread.<p>
476         */
477        protected void shutDown() {
478
479            m_isAlive = false;
480            interrupt();
481            if (m_isUpdating) {
482                long waitTime = getOfflineUpdateFrequency() / 2;
483                int waitSteps = 0;
484                do {
485                    try {
486                        // wait half the time of the offline index frequency for the thread to finish
487                        Thread.sleep(waitTime);
488                    } catch (InterruptedException e) {
489                        // continue
490                        LOG.info(e.getLocalizedMessage(), e);
491                    }
492                    waitSteps++;
493                    // wait 5 times then stop waiting
494                } while ((waitSteps < 5) && m_isUpdating);
495            }
496        }
497
498        /**
499         * Updates the offline search indexes for the given list of resources.<p>
500         *
501         * @param report the report to write the index information to
502         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
503         */
504        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
505
506            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
507            long startTime = System.currentTimeMillis();
508            long waitTime = getOfflineUpdateFrequency() / 2;
509            if (LOG.isDebugEnabled()) {
510                LOG.debug(
511                    Messages.get().getBundle().key(
512                        Messages.LOG_OI_UPDATE_START_1,
513                        Integer.valueOf(resourcesToIndex.size())));
514            }
515
516            m_isUpdating = true;
517            thread.start();
518
519            do {
520                try {
521                    // wait half the time of the offline index frequency for the thread to finish
522                    thread.join(waitTime);
523                } catch (InterruptedException e) {
524                    // continue
525                    LOG.info(e.getLocalizedMessage(), e);
526                }
527                if (thread.isAlive()) {
528                    LOG.warn(
529                        Messages.get().getBundle().key(
530                            Messages.LOG_OI_UPDATE_LONG_2,
531                            Integer.valueOf(resourcesToIndex.size()),
532                            Long.valueOf(System.currentTimeMillis() - startTime)));
533                }
534            } while (thread.isAlive());
535            m_isUpdating = false;
536
537            if (LOG.isDebugEnabled()) {
538                LOG.debug(
539                    Messages.get().getBundle().key(
540                        Messages.LOG_OI_UPDATE_FINISH_2,
541                        Integer.valueOf(resourcesToIndex.size()),
542                        Long.valueOf(System.currentTimeMillis() - startTime)));
543            }
544        }
545
546        /**
547         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
548         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
549         *
550         * @param resourcesToIndex the resources to index
551         *
552         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
553         */
554        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
555            List<CmsPublishedResource> resourcesToIndex) {
556
557            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
558            for (CmsPublishedResource resource : resourcesToIndex) {
559                if (resource.getState().isDeleted()) {
560                    // we don't want the last path to be from a deleted resource
561                    continue;
562                }
563                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
564            }
565            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
566            for (CmsPublishedResource resource : resourcesToIndex) {
567                if (resource.getState().isDeleted()) {
568                    result.add(resource);
569                    continue;
570                }
571                String lastValidPath = lastValidPaths.get(resource.getStructureId());
572                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
573                    result.add(resource);
574                } else {
575                    result.add(
576                        new CmsPublishedResource(
577                            resource.getStructureId(),
578                            resource.getResourceId(),
579                            resource.getPublishTag(),
580                            resource.getRootPath(),
581                            resource.getType(),
582                            resource.isFolder(),
583                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
584                            resource.getSiblingCount()));
585                }
586            }
587            return result;
588        }
589    }
590
591    /**
592     * An offline index worker Thread runs each time for every offline index update action.<p>
593     *
594     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
595     * problems if a single operation "hangs" the Tread.<p>
596     */
597    protected class CmsSearchOfflineIndexWorkThread extends Thread {
598
599        /** The report to write the index information to. */
600        I_CmsReport m_report;
601
602        /** The list of {@link CmsPublishedResource} objects to index. */
603        List<CmsPublishedResource> m_resourcesToIndex;
604
605        /**
606         * Updates the offline search indexes for the given list of resources.<p>
607         *
608         * @param report the report to write the index information to
609         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
610         */
611        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
612
613            super("OpenCms: Offline Search Index Worker");
614            m_report = report;
615            m_resourcesToIndex = resourcesToIndex;
616        }
617
618        /**
619         * @see java.lang.Thread#run()
620         */
621        @Override
622        public void run() {
623
624            updateIndexOffline(m_report, m_resourcesToIndex);
625            if (m_offlineIndexThread != null) {
626                m_offlineIndexThread.getWaitHandle().release();
627            }
628        }
629    }
630
631    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
632    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
633
634    /** The default value used for generating search result excerpts (1024 chars). */
635    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
636
637    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
638    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
639
640    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
641    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
642
643    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
644    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
645
646    /** The default maximal wait time for re-indexing after editing a content. */
647    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
648
649    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
650    public static final int DEFAULT_TIMEOUT = 60000;
651
652    /** Scheduler parameter: Update only a specified list of indexes. */
653    public static final String JOB_PARAM_INDEXLIST = "indexList";
654
655    /** Scheduler parameter: Write the output of the update to the logfile. */
656    public static final String JOB_PARAM_WRITELOG = "writeLog";
657
658    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
659    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
660
661    /** The log object for this class. */
662    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
663
664    /** The administrator OpenCms user context to access OpenCms VFS resources. */
665    protected CmsObject m_adminCms;
666
667    /** The list of indexes that are configured for offline index mode. */
668    protected List<I_CmsSearchIndex> m_offlineIndexes;
669
670    /** The thread used of offline indexing. */
671    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
672
673    /** Configured analyzers for languages using &lt;analyzer&gt;. */
674    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
675
676    /** Stores the offline update frequency while indexing is paused. */
677    private long m_configuredOfflineIndexingFrequency;
678
679    /** The Solr core container. */
680    private CoreContainer m_coreContainer;
681
682    /** A map of document factory configurations. */
683    private List<CmsSearchDocumentType> m_documentTypeConfigs;
684
685    /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */
686    private Map<String, I_CmsDocumentFactory> m_documentTypes;
687
688    /** The max age for extraction results to remain in the cache. */
689    private float m_extractionCacheMaxAge;
690
691    /** The cache for the extraction results. */
692    private CmsExtractionResultCache m_extractionResultCache;
693
694    /** Contains the available field configurations. */
695    private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations;
696
697    /** The force unlock type. */
698    private CmsSearchForceUnlockMode m_forceUnlockMode;
699
700    /** The class used to highlight the search terms in the excerpt of a search result. */
701    private I_CmsTermHighlighter m_highlighter;
702
703    /** A list of search indexes. */
704    private List<I_CmsSearchIndex> m_indexes;
705
706    /** Seconds to wait for an index lock. */
707    private int m_indexLockMaxWaitSeconds = 10;
708
709    /** Configured index sources. */
710    private Map<String, CmsSearchIndexSource> m_indexSources;
711
712    /** The max. char. length of the excerpt in the search result. */
713    private int m_maxExcerptLength;
714
715    /** The maximum number of modifications before a commit in the search index is triggered. */
716    private int m_maxModificationsBeforeCommit;
717
718    /** The offline index search handler. */
719    private CmsSearchOfflineHandler m_offlineHandler;
720
721    /** The update frequency of the offline indexer in milliseconds. */
722    private long m_offlineUpdateFrequency;
723
724    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
725    private long m_maxIndexWaitTime;
726
727    /** Path to index files below WEB-INF/. */
728    private String m_path;
729
730    /** The Solr configuration. */
731    private CmsSolrConfiguration m_solrConfig;
732
733    /** Timeout for abandoning indexing thread. */
734    private long m_timeout;
735
736    /**
737     * Default constructor when called as cron job.<p>
738     */
739    public CmsSearchManager() {
740
741        m_documentTypes = new HashMap<String, I_CmsDocumentFactory>();
742        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
743        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
744        m_indexes = new ArrayList<I_CmsSearchIndex>();
745        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
746        m_offlineHandler = new CmsSearchOfflineHandler();
747        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
748        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
749        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
750        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
751        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
752
753        m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>();
754        // make sure we have a "standard" field configuration
755        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
756
757        if (CmsLog.INIT.isInfoEnabled()) {
758            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
759        }
760    }
761
762    /**
763     * Returns an analyzer for the given class name.<p>
764     *
765     * @param className the class name of the analyzer
766     *
767     * @return the appropriate lucene analyzer
768     *
769     * @throws Exception if something goes wrong
770     */
771    public static Analyzer getAnalyzer(String className) throws Exception {
772
773        Analyzer analyzer = null;
774        Class<?> analyzerClass;
775        try {
776            analyzerClass = Class.forName(className);
777        } catch (ClassNotFoundException e) {
778            // allow Lucene standard classes to be written in a short form
779            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
780        }
781
782        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
783        if (StandardAnalyzer.class.equals(analyzerClass)) {
784            // the Lucene standard analyzer is used - but without any stopwords.
785            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
786        } else {
787            analyzer = (Analyzer)analyzerClass.newInstance();
788        }
789        return analyzer;
790    }
791
792    /**
793     * Returns the Solr index configured with the parameters name.
794     * The parameters must contain a key/value pair with an existing
795     * Solr index, otherwise <code>null</code> is returned.<p>
796     *
797     * @param cms the current context
798     * @param params the parameter map
799     *
800     * @return the best matching Solr index
801     */
802    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
803
804        String indexName = null;
805        CmsSolrIndex index = null;
806        // try to get the index name from the parameters: 'core' or 'index'
807        if (params != null) {
808            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
809            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
810            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
811            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
812            : null);
813        }
814        if (indexName == null) {
815            // if no parameter is specified try to use the default online/offline indexes by context
816            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
817            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
818            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
819        }
820        // try to get the index
821        index = indexName != null ? OpenCms.getSearchManager().getIndexSolr(indexName) : null;
822        if (index == null) {
823            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
824            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
825            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
826                index = solrs.get(0);
827            }
828        }
829        return index;
830    }
831
832    /**
833     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
834     *
835     * @param indexName the name of the index to check
836     *
837     * @return <code>true</code> if the index for the given name is a Lucene index
838     */
839    public static boolean isLuceneIndex(String indexName) {
840
841        I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
842        return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex));
843    }
844
845    /**
846     * Adds an analyzer.<p>
847     *
848     * @param analyzer an analyzer
849     */
850    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
851
852        m_analyzers.put(analyzer.getLocale(), analyzer);
853
854        if (CmsLog.INIT.isInfoEnabled()) {
855            CmsLog.INIT.info(
856                Messages.get().getBundle().key(
857                    Messages.INIT_ADD_ANALYZER_2,
858                    analyzer.getLocale(),
859                    analyzer.getClassName()));
860        }
861    }
862
863    /**
864     * Adds a document type.<p>
865     *
866     * @param documentType a document type
867     */
868    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
869
870        m_documentTypeConfigs.add(documentType);
871
872        if (CmsLog.INIT.isInfoEnabled()) {
873            CmsLog.INIT.info(
874                Messages.get().getBundle().key(
875                    Messages.INIT_SEARCH_DOC_TYPES_2,
876                    documentType.getName(),
877                    documentType.getClassName()));
878        }
879    }
880
881    /**
882     * Adds a search field configuration to the search manager.<p>
883     *
884     * @param fieldConfiguration the search field configuration to add
885     */
886    public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
887
888        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
889    }
890
891    /**
892     * Adds a search index to the configuration.<p>
893     *
894     * @param searchIndex the search index to add
895     */
896    public void addSearchIndex(I_CmsSearchIndex searchIndex) {
897
898        if (!searchIndex.isInitialized()) {
899            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
900                try {
901                    searchIndex.initialize();
902                } catch (CmsException e) {
903                    // should never happen
904                    LOG.error(e.getMessage(), e);
905                }
906            }
907        }
908
909        // name: not null or emtpy and unique
910        String name = searchIndex.getName();
911        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
912            throw new CmsIllegalArgumentException(
913                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
914        }
915        if (m_indexSources.keySet().contains(name)) {
916            throw new CmsIllegalArgumentException(
917                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
918        }
919
920        m_indexes.add(searchIndex);
921        if (m_adminCms != null) {
922            initOfflineIndexes();
923        }
924
925        if (CmsLog.INIT.isInfoEnabled()) {
926            CmsLog.INIT.info(
927                Messages.get().getBundle().key(
928                    Messages.INIT_ADD_SEARCH_INDEX_2,
929                    searchIndex.getName(),
930                    searchIndex.getProject()));
931        }
932    }
933
934    /**
935     * Adds a search index source configuration.<p>
936     *
937     * @param searchIndexSource a search index source configuration
938     */
939    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
940
941        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
942
943        if (CmsLog.INIT.isInfoEnabled()) {
944            CmsLog.INIT.info(
945                Messages.get().getBundle().key(
946                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
947                    searchIndexSource.getName(),
948                    searchIndexSource.getIndexerClassName()));
949        }
950    }
951
952    /**
953     * Implements the event listener of this class.<p>
954     *
955     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
956     */
957    public void cmsEvent(CmsEvent event) {
958
959        switch (event.getType()) {
960            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
961                List<String> indexNames = null;
962                if ((event.getData() != null)
963                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
964                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
965                    indexNames = CmsStringUtil.splitAsList(
966                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
967                        ",",
968                        true);
969                }
970                try {
971                    if (LOG.isDebugEnabled()) {
972                        LOG.debug(
973                            Messages.get().getBundle().key(
974                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
975                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
976                            new Exception());
977                    }
978                    if (indexNames == null) {
979                        rebuildAllIndexes(getEventReport(event));
980                    } else {
981                        rebuildIndexes(indexNames, getEventReport(event));
982                    }
983                } catch (CmsException e) {
984                    if (LOG.isErrorEnabled()) {
985                        LOG.error(
986                            Messages.get().getBundle().key(
987                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
988                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
989                            e);
990                    }
991                }
992                break;
993            case I_CmsEventListener.EVENT_CLEAR_CACHES:
994                if (LOG.isDebugEnabled()) {
995                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
996                }
997                break;
998            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
999                // event data contains a list of the published resources
1000                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1001                if (LOG.isDebugEnabled()) {
1002                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1003                }
1004                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1005                if (LOG.isDebugEnabled()) {
1006                    LOG.debug(
1007                        Messages.get().getBundle().key(
1008                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1009                            publishHistoryId));
1010                }
1011                break;
1012            default:
1013                // no operation
1014        }
1015    }
1016
1017    /**
1018     * Returns all Solr index.<p>
1019     *
1020     * @return all Solr indexes
1021     */
1022    public List<CmsSolrIndex> getAllSolrIndexes() {
1023
1024        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1025        for (String indexName : getIndexNames()) {
1026            CmsSolrIndex index = getIndexSolr(indexName);
1027            if (index != null) {
1028                result.add(index);
1029            }
1030        }
1031        return result;
1032    }
1033
1034    /**
1035     * Returns an analyzer for the given language.<p>
1036     *
1037     * The analyzer is selected according to the analyzer configuration.<p>
1038     *
1039     * @param locale the locale to get the analyzer for
1040     * @return the appropriate lucene analyzer
1041     *
1042     * @throws CmsSearchException if something goes wrong
1043     */
1044    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1045
1046        Analyzer analyzer = null;
1047        String className = null;
1048
1049        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1050        if (analyzerConf == null) {
1051            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1052        }
1053
1054        try {
1055            analyzer = getAnalyzer(analyzerConf.getClassName());
1056        } catch (Exception e) {
1057            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1058        }
1059
1060        return analyzer;
1061    }
1062
1063    /**
1064     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1065     *
1066     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1067     *
1068     * @return an unmodifiable view of the Analyzers Map
1069     */
1070    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1071
1072        return Collections.unmodifiableMap(m_analyzers);
1073    }
1074
1075    /**
1076     * Returns the search analyzer for the given locale.<p>
1077     *
1078     * @param locale the locale to get the analyzer for
1079     *
1080     * @return the search analyzer for the given locale
1081     */
1082    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1083
1084        return m_analyzers.get(locale);
1085    }
1086
1087    /**
1088     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1089     *
1090     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1091     */
1092    public String getDirectory() {
1093
1094        return m_path;
1095    }
1096
1097    /**
1098     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1099     *
1100     * @return the Solr home directory
1101     */
1102    public String getDirectorySolr() {
1103
1104        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1105    }
1106
1107    /**
1108     * Returns a lucene document factory for given resource.<p>
1109     *
1110     * The type of the document factory is selected by the type of the resource
1111     * and the MIME type of the resource content, according to the configuration in <code>opencms-search.xml</code>.<p>
1112     *
1113     * @param resource a cms resource
1114     * @return a lucene document factory or null
1115     */
1116    public I_CmsDocumentFactory getDocumentFactory(CmsResource resource) {
1117
1118        // first get the MIME type of the resource
1119        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1120        String resourceType = null;
1121        try {
1122            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1123        } catch (CmsLoaderException e) {
1124            // ignore, unknown resource type, resource can not be indexed
1125            LOG.info(e.getLocalizedMessage(), e);
1126        }
1127        return getDocumentFactory(resourceType, mimeType);
1128    }
1129
1130    /**
1131     * Returns a lucene document factory for given resource type and MIME type.<p>
1132     *
1133     * The type of the document factory is selected  according to the configuration
1134     * in <code>opencms-search.xml</code>.<p>
1135     *
1136     * @param resourceType the resource type name
1137     * @param mimeType the MIME type
1138     *
1139     * @return a lucene document factory or null in case no matching factory was found
1140     */
1141    public I_CmsDocumentFactory getDocumentFactory(String resourceType, String mimeType) {
1142
1143        I_CmsDocumentFactory result = null;
1144        if (resourceType != null) {
1145            // create the factory lookup key for the document
1146            String documentTypeKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1147            // check if a setting is available for this specific MIME type
1148            result = m_documentTypes.get(documentTypeKey);
1149            if (result == null) {
1150                // no setting is available, try to use a generic setting without MIME type
1151                result = m_documentTypes.get(A_CmsVfsDocument.getDocumentKey(resourceType, null));
1152                // please note: the result may still be null
1153            }
1154        }
1155        return result;
1156    }
1157
1158    /**
1159     * Returns a document type config.<p>
1160     *
1161     * @param name the name of the document type config
1162     * @return the document type config.
1163     */
1164    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1165
1166        // this is really used only for the search manager GUI,
1167        // so performance is not an issue and no lookup map is generated
1168        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1169            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1170            if (type.getName().equals(name)) {
1171                return type;
1172            }
1173        }
1174        return null;
1175    }
1176
1177    /**
1178     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1179     *
1180     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1181     */
1182    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1183
1184        return Collections.unmodifiableList(m_documentTypeConfigs);
1185    }
1186
1187    /**
1188     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1189     *
1190     * @return the maximum age a text extraction result is kept in the cache (in hours)
1191     */
1192    public float getExtractionCacheMaxAge() {
1193
1194        return m_extractionCacheMaxAge;
1195    }
1196
1197    /**
1198     * Returns the search field configuration with the given name.<p>
1199     *
1200     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1201     *
1202     * @param name the name to get the search field configuration for
1203     *
1204     * @return the search field configuration with the given name
1205     */
1206    public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1207
1208        return m_fieldConfigurations.get(name);
1209    }
1210
1211    /**
1212     * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p>
1213     *
1214     * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries
1215     */
1216    public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() {
1217
1218        List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>(
1219            m_fieldConfigurations.values());
1220        Collections.sort(result);
1221        return Collections.unmodifiableList(result);
1222    }
1223
1224    /**
1225     * Returns the Lucene search field configurations only.<p>
1226     *
1227     * @return the Lucene search field configurations
1228     */
1229    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1230
1231        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1232        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1233            if (conf instanceof CmsLuceneFieldConfiguration) {
1234                result.add((CmsLuceneFieldConfiguration)conf);
1235            }
1236        }
1237        Collections.sort(result);
1238        return Collections.unmodifiableList(result);
1239    }
1240
1241    /**
1242     * Returns the Solr search field configurations only.<p>
1243     *
1244     * @return the Solr search field configurations
1245     */
1246    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1247
1248        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1249        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1250            if (conf instanceof CmsSolrFieldConfiguration) {
1251                result.add((CmsSolrFieldConfiguration)conf);
1252            }
1253        }
1254        Collections.sort(result);
1255        return Collections.unmodifiableList(result);
1256    }
1257
1258    /**
1259     * Returns the force unlock mode during indexing.<p>
1260     *
1261     * @return the force unlock mode during indexing
1262     */
1263    public CmsSearchForceUnlockMode getForceunlock() {
1264
1265        return m_forceUnlockMode;
1266    }
1267
1268    /**
1269     * Returns the highlighter.<p>
1270     *
1271     * @return the highlighter
1272     */
1273    public I_CmsTermHighlighter getHighlighter() {
1274
1275        return m_highlighter;
1276    }
1277
1278    /**
1279     * Returns the Lucene search index configured with the given name.<p>
1280     * The index must exist, otherwise <code>null</code> is returned.
1281     *
1282     * @param indexName then name of the requested search index
1283     *
1284     * @return the Lucene search index configured with the given name
1285     */
1286    public I_CmsSearchIndex getIndex(String indexName) {
1287
1288        for (I_CmsSearchIndex index : m_indexes) {
1289            if (indexName.equalsIgnoreCase(index.getName())) {
1290                return index;
1291            }
1292        }
1293        return null;
1294    }
1295
1296    /**
1297     * Returns the seconds to wait for an index lock during an update operation.<p>
1298     *
1299     * @return the seconds to wait for an index lock during an update operation
1300     */
1301    public int getIndexLockMaxWaitSeconds() {
1302
1303        return m_indexLockMaxWaitSeconds;
1304    }
1305
1306    /**
1307     * Returns the names of all configured indexes.<p>
1308     *
1309     * @return list of names
1310     */
1311    public List<String> getIndexNames() {
1312
1313        List<String> indexNames = new ArrayList<String>();
1314        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1315            indexNames.add((m_indexes.get(i)).getName());
1316        }
1317
1318        return indexNames;
1319    }
1320
1321    /**
1322     * Returns the Solr index configured with the given name.<p>
1323     * The index must exist, otherwise <code>null</code> is returned.
1324     *
1325     * @param indexName then name of the requested Solr index
1326     * @return the Solr index configured with the given name
1327     */
1328    public CmsSolrIndex getIndexSolr(String indexName) {
1329
1330        I_CmsSearchIndex index = getIndex(indexName);
1331        if (index instanceof CmsSolrIndex) {
1332            return (CmsSolrIndex)index;
1333        }
1334        return null;
1335    }
1336
1337    /**
1338     * Returns a search index source for a specified source name.<p>
1339     *
1340     * @param sourceName the name of the index source
1341     * @return a search index source
1342     */
1343    public CmsSearchIndexSource getIndexSource(String sourceName) {
1344
1345        return m_indexSources.get(sourceName);
1346    }
1347
1348    /**
1349     * Returns the max. excerpt length.<p>
1350     *
1351     * @return the max excerpt length
1352     */
1353    public int getMaxExcerptLength() {
1354
1355        return m_maxExcerptLength;
1356    }
1357
1358    /**
1359     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1360     *
1361     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1362     */
1363    public long getMaxIndexWaitTime() {
1364
1365        return m_maxIndexWaitTime;
1366    }
1367
1368    /**
1369     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1370     *
1371     * @return the maximum number of modifications before a commit in the search index is triggered
1372     */
1373    public int getMaxModificationsBeforeCommit() {
1374
1375        return m_maxModificationsBeforeCommit;
1376    }
1377
1378    /**
1379     * Returns the update frequency of the offline indexer in milliseconds.<p>
1380     *
1381     * @return the update frequency of the offline indexer in milliseconds
1382     */
1383    public long getOfflineUpdateFrequency() {
1384
1385        return m_offlineUpdateFrequency;
1386    }
1387
1388    /**
1389     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1390     *
1391     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1392     */
1393    public List<I_CmsSearchIndex> getSearchIndexes() {
1394
1395        return Collections.unmodifiableList(m_indexes);
1396    }
1397
1398    /**
1399     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1400     *
1401     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1402     */
1403    public List<I_CmsSearchIndex> getSearchIndexesAll() {
1404
1405        return Collections.unmodifiableList(m_indexes);
1406    }
1407
1408    /**
1409     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1410     *
1411     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1412     */
1413    public List<CmsSolrIndex> getSearchIndexesSolr() {
1414
1415        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1416        for (I_CmsSearchIndex index : m_indexes) {
1417            if (index instanceof CmsSolrIndex) {
1418                indexes.add((CmsSolrIndex)index);
1419            }
1420        }
1421        return Collections.unmodifiableList(indexes);
1422    }
1423
1424    /**
1425     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1426     *
1427     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1428     */
1429    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1430
1431        return Collections.unmodifiableMap(m_indexSources);
1432    }
1433
1434    /**
1435     * Return singleton instance of the OpenCms spellchecker.<p>
1436     *
1437     * @return instance of CmsSolrSpellchecker.
1438     */
1439    public CmsSolrSpellchecker getSolrDictionary() {
1440
1441        // get the core container that contains one core for each configured index
1442        if (m_coreContainer == null) {
1443            m_coreContainer = createCoreContainer();
1444        }
1445        return CmsSolrSpellchecker.getInstance(m_coreContainer);
1446    }
1447
1448    /**
1449     * Returns the Solr configuration.<p>
1450     *
1451     * @return the Solr configuration
1452     */
1453    public CmsSolrConfiguration getSolrServerConfiguration() {
1454
1455        return m_solrConfig;
1456    }
1457
1458    /**
1459     * Returns the timeout to abandon threads indexing a resource.<p>
1460     *
1461     * @return the timeout to abandon threads indexing a resource
1462     */
1463    public long getTimeout() {
1464
1465        return m_timeout;
1466    }
1467
1468    /**
1469     * Initializes the search manager.<p>
1470     *
1471     * @param cms the cms object
1472     *
1473     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1474     */
1475    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1476
1477        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1478        try {
1479            // store the Admin cms to index Cms resources
1480            m_adminCms = OpenCms.initCmsObject(cms);
1481        } catch (CmsException e) {
1482            // this should never happen
1483            LOG.error(e.getLocalizedMessage(), e);
1484        }
1485        // make sure the site root is the root site
1486        m_adminCms.getRequestContext().setSiteRoot("/");
1487
1488        // create the extraction result cache
1489        m_extractionResultCache = new CmsExtractionResultCache(
1490            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1491            "/extractCache");
1492        initializeFieldConfigurations();
1493        initializeIndexes();
1494        initOfflineIndexes();
1495
1496        // register this object as event listener
1497        OpenCms.addCmsEventListener(
1498            this,
1499            new int[] {
1500                I_CmsEventListener.EVENT_CLEAR_CACHES,
1501                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1502                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES});
1503    }
1504
1505    /**
1506     * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations.
1507     */
1508    public void initializeFieldConfigurations() {
1509
1510        for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) {
1511            config.init();
1512        }
1513
1514    }
1515
1516    /**
1517     * Initializes all configured document types and search indexes.<p>
1518     *
1519     * This methods needs to be called if after a change in the index configuration has been made.
1520     */
1521    public void initializeIndexes() {
1522
1523        initAvailableDocumentTypes();
1524        initSearchIndexes();
1525    }
1526
1527    /**
1528     * Initialize the offline index handler, require after an offline index has been added.<p>
1529     */
1530    public void initOfflineIndexes() {
1531
1532        // check which indexes are configured as offline indexes
1533        List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>();
1534        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
1535        while (i.hasNext()) {
1536            I_CmsSearchIndex index = i.next();
1537            if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1538                // this is an offline index
1539                offlineIndexes.add(index);
1540            }
1541        }
1542        m_offlineIndexes = offlineIndexes;
1543        m_offlineHandler.initialize();
1544
1545    }
1546
1547    /**
1548     * Initializes the spell check index.<p>
1549     *
1550     * @param adminCms the ROOT_ADMIN cms context
1551     */
1552    public void initSpellcheckIndex(CmsObject adminCms) {
1553
1554        if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) {
1555            final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary();
1556            if (spellchecker != null) {
1557
1558                Runnable initRunner = new Runnable() {
1559
1560                    public void run() {
1561
1562                        try {
1563                            spellchecker.parseAndAddDictionaries(adminCms);
1564                        } catch (CmsRoleViolationException e) {
1565                            LOG.error(e.getLocalizedMessage(), e);
1566                        }
1567                    }
1568                };
1569                new Thread(initRunner).start();
1570            }
1571        }
1572    }
1573
1574    /**
1575     * Returns if the offline indexing is paused.<p>
1576     *
1577     * @return <code>true</code> if the offline indexing is paused
1578     */
1579    public boolean isOfflineIndexingPaused() {
1580
1581        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1582    }
1583
1584    /**
1585     * Updates the indexes from as a scheduled job.<p>
1586     *
1587     * @param cms the OpenCms user context to use when reading resources from the VFS
1588     * @param parameters the parameters for the scheduled job
1589     *
1590     * @throws Exception if something goes wrong
1591     *
1592     * @return the String to write in the scheduler log
1593     *
1594     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1595     */
1596    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1597
1598        CmsSearchManager manager = OpenCms.getSearchManager();
1599
1600        I_CmsReport report = null;
1601        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1602
1603        if (writeLog) {
1604            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1605        }
1606
1607        List<String> updateList = null;
1608        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1609        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1610            // index list has been provided as job parameter
1611            updateList = new ArrayList<String>();
1612            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1613            for (int i = 0; i < indexNames.length; i++) {
1614                // check if the index actually exists
1615                if (manager.getIndex(indexNames[i]) != null) {
1616                    updateList.add(indexNames[i]);
1617                } else {
1618                    if (LOG.isWarnEnabled()) {
1619                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1620                    }
1621                }
1622            }
1623        }
1624
1625        long startTime = System.currentTimeMillis();
1626
1627        if (updateList == null) {
1628            // all indexes need to be updated
1629            manager.rebuildAllIndexes(report);
1630        } else {
1631            // rebuild only the selected indexes
1632            manager.rebuildIndexes(updateList, report);
1633        }
1634
1635        long runTime = System.currentTimeMillis() - startTime;
1636
1637        String finishMessage = Messages.get().getBundle().key(
1638            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1639            CmsStringUtil.formatRuntime(runTime));
1640
1641        if (LOG.isInfoEnabled()) {
1642            LOG.info(finishMessage);
1643        }
1644        return finishMessage;
1645    }
1646
1647    /**
1648     * Pauses the offline indexing.<p>
1649     * May take some time, because the indexes are updated first.<p>
1650     */
1651    public void pauseOfflineIndexing() {
1652
1653        if (m_offlineUpdateFrequency != Long.MAX_VALUE) {
1654            m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1655            m_offlineUpdateFrequency = Long.MAX_VALUE;
1656            updateOfflineIndexes(0);
1657        }
1658    }
1659
1660    /**
1661     * Rebuilds (if required creates) all configured indexes.<p>
1662     *
1663     * @param report the report object to write messages (or <code>null</code>)
1664     *
1665     * @throws CmsException if something goes wrong
1666     */
1667    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1668
1669        try {
1670            SEARCH_MANAGER_LOCK.lock();
1671
1672            CmsMessageContainer container = null;
1673            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1674                // iterate all configured search indexes
1675                I_CmsSearchIndex searchIndex = m_indexes.get(i);
1676                try {
1677                    // update the index
1678                    updateIndex(searchIndex, report, null);
1679                } catch (CmsException e) {
1680                    container = new CmsMessageContainer(
1681                        Messages.get(),
1682                        Messages.ERR_INDEX_REBUILD_ALL_1,
1683                        new Object[] {searchIndex.getName()});
1684                    LOG.error(
1685                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1686                        e);
1687                }
1688            }
1689            // clean up the extraction result cache
1690            cleanExtractionCache();
1691            if (container != null) {
1692                // throw stored exception
1693                throw new CmsSearchException(container);
1694            }
1695        } finally {
1696            SEARCH_MANAGER_LOCK.unlock();
1697        }
1698    }
1699
1700    /**
1701     * Rebuilds (if required creates) the index with the given name.<p>
1702     *
1703     * @param indexName the name of the index to rebuild
1704     * @param report the report object to write messages (or <code>null</code>)
1705     *
1706     * @throws CmsException if something goes wrong
1707     */
1708    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1709
1710        try {
1711            SEARCH_MANAGER_LOCK.lock();
1712            // get the search index by name
1713            I_CmsSearchIndex index = getIndex(indexName);
1714            // update the index
1715            updateIndex(index, report, null);
1716            // clean up the extraction result cache
1717            cleanExtractionCache();
1718        } finally {
1719            SEARCH_MANAGER_LOCK.unlock();
1720        }
1721    }
1722
1723    /**
1724     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1725     *
1726     * @param indexNames the names (String) of the index to rebuild
1727     * @param report the report object to write messages (or <code>null</code>)
1728     *
1729     * @throws CmsException if something goes wrong
1730     */
1731    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1732
1733        try {
1734            SEARCH_MANAGER_LOCK.lock();
1735            Iterator<String> i = indexNames.iterator();
1736            while (i.hasNext()) {
1737                String indexName = i.next();
1738                // get the search index by name
1739                I_CmsSearchIndex index = getIndex(indexName);
1740                if (index != null) {
1741                    // update the index
1742                    updateIndex(index, report, null);
1743                } else {
1744                    if (LOG.isWarnEnabled()) {
1745                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1746                    }
1747                }
1748            }
1749            // clean up the extraction result cache
1750            cleanExtractionCache();
1751        } finally {
1752            SEARCH_MANAGER_LOCK.unlock();
1753        }
1754    }
1755
1756    /**
1757     * Registers a new Solr core for the given index.<p>
1758     *
1759     * @param index the index to register a new Solr core for
1760     *
1761     * @throws CmsConfigurationException if no Solr server is configured
1762     */
1763    @SuppressWarnings("resource")
1764    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
1765
1766        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
1767            // No solr server configured
1768            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
1769        }
1770
1771        if (m_solrConfig.getServerUrl() != null) {
1772            // HTTP Server configured
1773            // TODO Implement multi core support for HTTP server
1774            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
1775            index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build());
1776        }
1777
1778        // get the core container that contains one core for each configured index
1779        if (m_coreContainer == null) {
1780            m_coreContainer = createCoreContainer();
1781        }
1782
1783        // unload the existing core if it exists to avoid problems with forced unlock.
1784        if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) {
1785            m_coreContainer.unload(index.getCoreName(), false, false, true);
1786        }
1787        // ensure that all locks on the index are gone
1788        ensureIndexIsUnlocked(index.getPath());
1789
1790        // load the core to the container
1791        File dataDir = new File(index.getPath());
1792        if (!dataDir.exists()) {
1793            dataDir.mkdirs();
1794            if (CmsLog.INIT.isInfoEnabled()) {
1795                CmsLog.INIT.info(
1796                    Messages.get().getBundle().key(
1797                        Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
1798                        index.getName(),
1799                        index.getPath()));
1800            }
1801        }
1802        File instanceDir = new File(m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
1803        if (!instanceDir.exists()) {
1804            instanceDir.mkdirs();
1805            if (CmsLog.INIT.isInfoEnabled()) {
1806                CmsLog.INIT.info(
1807                    Messages.get().getBundle().key(
1808                        Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
1809                        index.getName(),
1810                        index.getPath()));
1811            }
1812        }
1813
1814        // create the core
1815        // TODO: suboptimal - forces always the same schema
1816        SolrCore core = null;
1817        try {
1818            // creation includes registration.
1819            // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
1820            Map<String, String> properties = new HashMap<String, String>(3);
1821            properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
1822            properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
1823            core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false);
1824        } catch (NullPointerException e) {
1825            if (core != null) {
1826                core.close();
1827            }
1828            throw new CmsConfigurationException(
1829                Messages.get().container(
1830                    Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
1831                    index.getName() + " (" + index.getCoreName() + ")",
1832                    index.getPath(),
1833                    m_solrConfig.getSolrConfigFile().getAbsolutePath()),
1834                e);
1835        }
1836
1837        if (index.isNoSolrServerSet()) {
1838            index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
1839        }
1840        if (CmsLog.INIT.isInfoEnabled()) {
1841            CmsLog.INIT.info(
1842                Messages.get().getBundle().key(
1843                    Messages.INIT_SOLR_SERVER_CREATED_1,
1844                    index.getName() + " (" + index.getCoreName() + ")"));
1845        }
1846    }
1847
1848    /**
1849     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
1850     *
1851     * @param fieldConfiguration the field configuration to remove from the configuration
1852     *
1853     * @return true if remove was successful, false if preconditions for removal are ok but the given
1854     *         field configuration was unknown to the manager.
1855     *
1856     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
1857     *         <code>{@link I_CmsSearchIndex}</code>.
1858     *
1859     */
1860    public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration)
1861    throws CmsIllegalStateException {
1862
1863        // never remove the standard field configuration
1864        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
1865            throw new CmsIllegalStateException(
1866                Messages.get().container(
1867                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
1868                    fieldConfiguration.getName()));
1869        }
1870        // validation if removal will be granted
1871        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
1872        I_CmsSearchIndex idx;
1873        // the list for collecting indexes that use the given field configuration
1874        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
1875        I_CmsSearchFieldConfiguration refFieldConfig;
1876        while (itIndexes.hasNext()) {
1877            idx = itIndexes.next();
1878            refFieldConfig = idx.getFieldConfiguration();
1879            if (refFieldConfig.equals(fieldConfiguration)) {
1880                referrers.add(idx);
1881            }
1882        }
1883        if (referrers.size() > 0) {
1884            throw new CmsIllegalStateException(
1885                Messages.get().container(
1886                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
1887                    fieldConfiguration.getName(),
1888                    referrers.toString()));
1889        }
1890
1891        // remove operation (no exception)
1892        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
1893
1894    }
1895
1896    /**
1897     * Removes a search field from the field configuration.<p>
1898     *
1899     * @param fieldConfiguration the field configuration
1900     * @param field field to remove from the field configuration
1901     *
1902     * @return true if remove was successful, false if preconditions for removal are ok but the given
1903     *         field was unknown.
1904     */
1905    public boolean removeSearchFieldConfigurationField(
1906        I_CmsSearchFieldConfiguration fieldConfiguration,
1907        CmsSearchField field) {
1908
1909        if (LOG.isInfoEnabled()) {
1910            LOG.info(
1911                Messages.get().getBundle().key(
1912                    Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
1913                    field.getName(),
1914                    fieldConfiguration.getName()));
1915        }
1916
1917        return fieldConfiguration.getFields().remove(field);
1918    }
1919
1920    /**
1921     * Removes a search field mapping from the given field.<p>
1922     *
1923     * @param field the field
1924     * @param mapping mapping to remove from the field
1925     *
1926     * @return true if remove was successful, false if preconditions for removal are ok but the given
1927     *         mapping was unknown.
1928     *
1929     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
1930     */
1931    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
1932    throws CmsIllegalStateException {
1933
1934        if (field.getMappings().size() < 2) {
1935            throw new CmsIllegalStateException(
1936                Messages.get().container(
1937                    Messages.ERR_FIELD_MAPPING_DELETE_2,
1938                    mapping.getType().toString(),
1939                    field.getName()));
1940        } else {
1941
1942            if (LOG.isInfoEnabled()) {
1943                LOG.info(
1944                    Messages.get().getBundle().key(
1945                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
1946                        mapping.toString(),
1947                        field.getName()));
1948            }
1949            return field.getMappings().remove(mapping);
1950        }
1951    }
1952
1953    /**
1954     * Removes a search index from the configuration.<p>
1955     *
1956     * @param searchIndex the search index to remove
1957     */
1958    public void removeSearchIndex(I_CmsSearchIndex searchIndex) {
1959
1960        // shut down index to remove potential config files of Solr indexes
1961        searchIndex.shutDown();
1962        if (searchIndex instanceof CmsSolrIndex) {
1963            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
1964            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
1965        }
1966        m_indexes.remove(searchIndex);
1967        initOfflineIndexes();
1968
1969        if (LOG.isInfoEnabled()) {
1970            LOG.info(
1971                Messages.get().getBundle().key(
1972                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
1973                    searchIndex.getName(),
1974                    searchIndex.getProject()));
1975        }
1976    }
1977
1978    /**
1979     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
1980     *
1981     * @param indexNames the names of the index to remove
1982     */
1983    public void removeSearchIndexes(List<String> indexNames) {
1984
1985        Iterator<String> i = indexNames.iterator();
1986        while (i.hasNext()) {
1987            String indexName = i.next();
1988            // get the search index by name
1989            I_CmsSearchIndex index = getIndex(indexName);
1990            if (index != null) {
1991                // remove the index
1992                removeSearchIndex(index);
1993            } else {
1994                if (LOG.isWarnEnabled()) {
1995                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1996                }
1997            }
1998        }
1999    }
2000
2001    /**
2002     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
2003     *
2004     * @param indexsource the indexsource to remove from the configuration
2005     *
2006     * @return true if remove was successful, false if preconditions for removal are ok but the given
2007     *         searchindex was unknown to the manager.
2008     *
2009     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
2010     *         <code>{@link I_CmsSearchIndex}</code>.
2011     *
2012     */
2013    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2014
2015        // validation if removal will be granted
2016        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2017        I_CmsSearchIndex idx;
2018        // the list for collecting indexes that use the given index source
2019        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2020        // the current list of referred index sources of the iterated index
2021        List<CmsSearchIndexSource> refsources;
2022        while (itIndexes.hasNext()) {
2023            idx = itIndexes.next();
2024            refsources = idx.getSources();
2025            if (refsources != null) {
2026                if (refsources.contains(indexsource)) {
2027                    referrers.add(idx);
2028                }
2029            }
2030        }
2031        if (referrers.size() > 0) {
2032            throw new CmsIllegalStateException(
2033                Messages.get().container(
2034                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2035                    indexsource.getName(),
2036                    referrers.toString()));
2037        }
2038
2039        // remove operation (no exception)
2040        return m_indexSources.remove(indexsource.getName()) != null;
2041
2042    }
2043
2044    /**
2045     * Resumes offline indexing if it was paused.<p>
2046     */
2047    public void resumeOfflineIndexing() {
2048
2049        if (m_offlineUpdateFrequency == Long.MAX_VALUE) {
2050            setOfflineUpdateFrequency(
2051                m_configuredOfflineIndexingFrequency > 0
2052                ? m_configuredOfflineIndexingFrequency
2053                : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2054        }
2055    }
2056
2057    /**
2058     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2059     *
2060     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2061     */
2062    public void setDirectory(String value) {
2063
2064        m_path = value;
2065    }
2066
2067    /**
2068     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2069     *
2070     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2071     */
2072    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2073
2074        m_extractionCacheMaxAge = extractionCacheMaxAge;
2075    }
2076
2077    /**
2078     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2079     *
2080     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2081     */
2082    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2083
2084        try {
2085            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2086        } catch (NumberFormatException e) {
2087            LOG.error(
2088                Messages.get().getBundle().key(
2089                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2090                    extractionCacheMaxAge,
2091                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2092                e);
2093            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2094        }
2095    }
2096
2097    /**
2098     * Sets the unlock mode during indexing.<p>
2099     *
2100     * @param value the value
2101     */
2102    public void setForceunlock(String value) {
2103
2104        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2105    }
2106
2107    /**
2108     * Sets the highlighter.<p>
2109     *
2110     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2111     *
2112     * @param highlighter the package/class name of the highlighter
2113     */
2114    public void setHighlighter(String highlighter) {
2115
2116        try {
2117            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2118        } catch (Exception e) {
2119            m_highlighter = null;
2120            LOG.error(e.getLocalizedMessage(), e);
2121        }
2122    }
2123
2124    /**
2125     * Sets the seconds to wait for an index lock during an update operation.<p>
2126     *
2127     * @param value the seconds to wait for an index lock during an update operation
2128     */
2129    public void setIndexLockMaxWaitSeconds(int value) {
2130
2131        m_indexLockMaxWaitSeconds = value;
2132    }
2133
2134    /**
2135     * Sets the max. excerpt length.<p>
2136     *
2137     * @param maxExcerptLength the max. excerpt length to set
2138     */
2139    public void setMaxExcerptLength(int maxExcerptLength) {
2140
2141        m_maxExcerptLength = maxExcerptLength;
2142    }
2143
2144    /**
2145     * Sets the max. excerpt length as a String.<p>
2146     *
2147     * @param maxExcerptLength the max. excerpt length to set
2148     */
2149    public void setMaxExcerptLength(String maxExcerptLength) {
2150
2151        try {
2152            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2153        } catch (Exception e) {
2154            LOG.error(
2155                Messages.get().getBundle().key(
2156                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2157                    maxExcerptLength,
2158                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2159                e);
2160            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2161        }
2162    }
2163
2164    /**
2165     * Sets the maximal wait time for offline index updates after edit operations.<p>
2166     *
2167     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2168     */
2169    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2170
2171        m_maxIndexWaitTime = maxIndexWaitTime;
2172    }
2173
2174    /**
2175     * Sets the maximal wait time for offline index updates after edit operations.<p>
2176     *
2177     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2178     */
2179    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2180
2181        try {
2182            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2183        } catch (Exception e) {
2184            LOG.error(
2185                Messages.get().getBundle().key(
2186                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2187                    maxIndexWaitTime,
2188                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2189                e);
2190            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2191        }
2192    }
2193
2194    /**
2195     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2196     *
2197     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2198     */
2199    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2200
2201        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2202    }
2203
2204    /**
2205     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2206     *
2207     * @param value the maximum number of modifications to set
2208     */
2209    public void setMaxModificationsBeforeCommit(String value) {
2210
2211        try {
2212            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2213        } catch (Exception e) {
2214            LOG.error(
2215                Messages.get().getBundle().key(
2216                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2217                    value,
2218                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2219                e);
2220            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2221        }
2222    }
2223
2224    /**
2225     * Sets the update frequency of the offline indexer in milliseconds.<p>
2226     *
2227     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2228     */
2229    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2230
2231        m_offlineUpdateFrequency = offlineUpdateFrequency;
2232        updateOfflineIndexes(0);
2233    }
2234
2235    /**
2236     * Sets the update frequency of the offline indexer in milliseconds.<p>
2237     *
2238     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2239     */
2240    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2241
2242        try {
2243            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2244        } catch (Exception e) {
2245            LOG.error(
2246                Messages.get().getBundle().key(
2247                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2248                    offlineUpdateFrequency,
2249                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2250                e);
2251            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2252        }
2253    }
2254
2255    /**
2256     * Sets the Solr configuration.<p>
2257     *
2258     * @param config the Solr configuration
2259     */
2260    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2261
2262        m_solrConfig = config;
2263    }
2264
2265    /**
2266     * Sets the timeout to abandon threads indexing a resource.<p>
2267     *
2268     * @param value the timeout in milliseconds
2269     */
2270    public void setTimeout(long value) {
2271
2272        m_timeout = value;
2273    }
2274
2275    /**
2276     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2277     *
2278     * @param value the timeout in milliseconds
2279     */
2280    public void setTimeout(String value) {
2281
2282        try {
2283            setTimeout(Long.parseLong(value));
2284        } catch (Exception e) {
2285            LOG.error(
2286                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2287                e);
2288            setTimeout(DEFAULT_TIMEOUT);
2289        }
2290    }
2291
2292    /**
2293     * Shuts down the search manager.<p>
2294     *
2295     * This will cause all search indices to be shut down.<p>
2296     */
2297    public void shutDown() {
2298
2299        if (m_offlineIndexThread != null) {
2300            m_offlineIndexThread.shutDown();
2301        }
2302
2303        if (m_offlineHandler != null) {
2304            OpenCms.removeCmsEventListener(m_offlineHandler);
2305        }
2306
2307        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2308        while (i.hasNext()) {
2309            I_CmsSearchIndex index = i.next();
2310            index.shutDown();
2311            index = null;
2312        }
2313        m_indexes.clear();
2314
2315        shutDownSolrContainer();
2316
2317        if (CmsLog.INIT.isInfoEnabled()) {
2318            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2319        }
2320    }
2321
2322    /**
2323     * Updates all offline indexes.<p>
2324     *
2325     * Can be used to force an index update when it's not convenient to wait until the
2326     * offline update interval has eclipsed.<p>
2327     *
2328     * Since the offline indexes still need some time to update the new resources,
2329     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2330     * to ensure that updating is finished.
2331     *
2332     * @see #updateOfflineIndexes(long)
2333     *
2334     */
2335    public void updateOfflineIndexes() {
2336
2337        updateOfflineIndexes(getMaxIndexWaitTime());
2338    }
2339
2340    /**
2341     * Updates all offline indexes.<p>
2342     *
2343     * Can be used to force an index update when it's not convenient to wait until the
2344     * offline update interval has eclipsed.<p>
2345     *
2346     * Since the offline index will still need some time to update the new resources even if it runs directly,
2347     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2348     *
2349     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2350     */
2351    public void updateOfflineIndexes(long waitTime) {
2352
2353        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2354            // notify existing thread of update frequency change
2355            if (LOG.isDebugEnabled()) {
2356                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2357            }
2358            m_offlineIndexThread.interrupt();
2359            if (waitTime > 0) {
2360                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2361            }
2362        }
2363    }
2364
2365    /**
2366     * Cleans up the extraction result cache.<p>
2367     */
2368    protected void cleanExtractionCache() {
2369
2370        // clean up the extraction result cache
2371        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2372    }
2373
2374    /**
2375     * Collects the related containerpages to the resources that have been published.<p>
2376     *
2377     * @param adminCms an OpenCms user context with Admin permissions
2378     * @param updateResources the resources to be re-indexed
2379     *
2380     * @return the updated list of resource to re-index
2381     */
2382    protected List<CmsPublishedResource> findRelatedContainerPages(
2383        CmsObject adminCms,
2384        List<CmsPublishedResource> updateResources) {
2385
2386        Set<CmsResource> elementGroups = new HashSet<CmsResource>();
2387        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2388        int containerPageTypeId = -1;
2389        try {
2390            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2391        } catch (CmsLoaderException e) {
2392            // will happen during setup, when container page type is not available yet
2393            LOG.info(e.getLocalizedMessage(), e);
2394        }
2395        if (containerPageTypeId != -1) {
2396            for (CmsPublishedResource pubRes : updateResources) {
2397                try {
2398                    if (OpenCms.getResourceManager().getResourceType(
2399                        pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2400                        CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2401                        filter.filterStrong();
2402                        List<CmsRelation> relations = adminCms.readRelations(filter);
2403                        for (CmsRelation relation : relations) {
2404                            CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2405                            if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2406                                containerPages.add(res);
2407                                if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2408                                    adminCms,
2409                                    adminCms.getSitePath(res))) {
2410                                    addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2411                                }
2412                            } else if (OpenCms.getResourceManager().getResourceType(
2413                                res.getTypeId()).getTypeName().equals(
2414                                    CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) {
2415                                elementGroups.add(res);
2416                            }
2417                        }
2418                    }
2419                    if (containerPageTypeId == pubRes.getType()) {
2420                        addDetailContent(
2421                            adminCms,
2422                            containerPages,
2423                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2424                    }
2425                } catch (CmsException e) {
2426                    LOG.error(e.getLocalizedMessage(), e);
2427                }
2428            }
2429            for (CmsResource pubRes : elementGroups) {
2430                try {
2431                    CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId());
2432                    filter.filterStrong();
2433                    List<CmsRelation> relations = adminCms.readRelations(filter);
2434                    for (CmsRelation relation : relations) {
2435                        CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2436                        if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2437                            containerPages.add(res);
2438                            if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2439                                adminCms,
2440                                adminCms.getSitePath(res))) {
2441                                addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2442                            }
2443                        }
2444                    }
2445                } catch (CmsException e) {
2446                    LOG.error(e.getLocalizedMessage(), e);
2447                }
2448            }
2449            // add all found container pages as published resource objects to the list
2450            for (CmsResource page : containerPages) {
2451                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2452                if (!updateResources.contains(pubCont)) {
2453                    // ensure container page is added only once
2454                    updateResources.add(pubCont);
2455                }
2456            }
2457        }
2458        return updateResources;
2459    }
2460
2461    /**
2462     * Returns the set of names of all configured document types.<p>
2463     *
2464     * @return the set of names of all configured document types
2465     */
2466    protected List<String> getDocumentTypes() {
2467
2468        List<String> names = new ArrayList<String>();
2469        for (Iterator<I_CmsDocumentFactory> i = m_documentTypes.values().iterator(); i.hasNext();) {
2470            I_CmsDocumentFactory factory = i.next();
2471            names.add(factory.getName());
2472        }
2473        return names;
2474    }
2475
2476    /**
2477     * Returns the a offline project used for offline indexing.<p>
2478     *
2479     * @return the offline project if available
2480     */
2481    protected CmsProject getOfflineIndexProject() {
2482
2483        CmsProject result = null;
2484        for (I_CmsSearchIndex index : m_offlineIndexes) {
2485            try {
2486                result = m_adminCms.readProject(index.getProject());
2487
2488                if (!result.isOnlineProject()) {
2489                    break;
2490                }
2491            } catch (Exception e) {
2492                // may be a missconfigured index, ignore
2493                LOG.error(e.getLocalizedMessage(), e);
2494            }
2495        }
2496        return result;
2497    }
2498
2499    /**
2500     * Returns a new thread manager for the indexing threads.<p>
2501     *
2502     * @return a new thread manager for the indexing threads
2503     */
2504    protected CmsIndexingThreadManager getThreadManager() {
2505
2506        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2507    }
2508
2509    /**
2510     * Initializes the available Cms resource types to be indexed.<p>
2511     *
2512     * A map stores document factories keyed by a string representing
2513     * a colon separated list of Cms resource types and/or mimetypes.<p>
2514     *
2515     * The keys of this map are used to trigger a document factory to convert
2516     * a Cms resource into a Lucene index document.<p>
2517     *
2518     * A document factory is a class implementing the interface
2519     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2520     */
2521    protected void initAvailableDocumentTypes() {
2522
2523        CmsSearchDocumentType documenttype = null;
2524        String className = null;
2525        String name = null;
2526        I_CmsDocumentFactory documentFactory = null;
2527        List<String> resourceTypes = null;
2528        List<String> mimeTypes = null;
2529        Class<?> c = null;
2530
2531        m_documentTypes = new HashMap<String, I_CmsDocumentFactory>();
2532
2533        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2534
2535            documenttype = m_documentTypeConfigs.get(i);
2536            name = documenttype.getName();
2537
2538            try {
2539                className = documenttype.getClassName();
2540                resourceTypes = documenttype.getResourceTypes();
2541                mimeTypes = documenttype.getMimeTypes();
2542
2543                if (name == null) {
2544                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2545                }
2546                if (className == null) {
2547                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2548                }
2549                if (resourceTypes.size() == 0) {
2550                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2551                }
2552
2553                try {
2554                    c = Class.forName(className);
2555                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2556                        new Object[] {name});
2557                } catch (ClassNotFoundException exc) {
2558                    throw new CmsIndexException(
2559                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2560                        exc);
2561                } catch (Exception exc) {
2562                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2563                }
2564
2565                if (documentFactory.isUsingCache()) {
2566                    // init cache if used by the factory
2567                    documentFactory.setCache(m_extractionResultCache);
2568                }
2569
2570                for (Iterator<String> key = documentFactory.getDocumentKeys(
2571                    resourceTypes,
2572                    mimeTypes).iterator(); key.hasNext();) {
2573                    m_documentTypes.put(key.next(), documentFactory);
2574                }
2575
2576            } catch (CmsException e) {
2577                if (LOG.isWarnEnabled()) {
2578                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2579                }
2580            }
2581        }
2582    }
2583
2584    /**
2585     * Initializes the configured search indexes.<p>
2586     *
2587     * This initializes also the list of Cms resources types
2588     * to be indexed by an index source.<p>
2589     */
2590    protected void initSearchIndexes() {
2591
2592        I_CmsSearchIndex index = null;
2593        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2594            index = m_indexes.get(i);
2595            // reset disabled flag
2596            index.setEnabled(true);
2597            // check if the index has been configured correctly
2598            if (index.checkConfiguration(m_adminCms)) {
2599                // the index is configured correctly
2600                try {
2601                    index.initialize();
2602                } catch (Exception e) {
2603                    if (CmsLog.INIT.isWarnEnabled()) {
2604                        // in this case the index will be disabled
2605                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2606                    }
2607                }
2608            }
2609            // output a log message if the index was successfully configured or not
2610            if (CmsLog.INIT.isInfoEnabled()) {
2611                if (index.isEnabled()) {
2612                    CmsLog.INIT.info(
2613                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2614                } else {
2615                    CmsLog.INIT.warn(
2616                        Messages.get().getBundle().key(
2617                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2618                            index,
2619                            index.getProject()));
2620                }
2621            }
2622        }
2623    }
2624
2625    /**
2626     * Checks, if the index should be rebuilt/updated at all by the search manager.
2627     * @param index the index to check.
2628     * @return a flag, indicating if the index should be rebuilt/updated at all.
2629     */
2630    protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) {
2631
2632        if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) {
2633            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName()));
2634            return false;
2635        } else {
2636            return true;
2637        }
2638
2639    }
2640
2641    /**
2642     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2643     * after resources have been published.<p>
2644     *
2645     * @param adminCms an OpenCms user context with Admin permissions
2646     * @param publishHistoryId the history ID of the published project
2647     * @param report the report to write the output to
2648     */
2649    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2650
2651        int oldPriority = Thread.currentThread().getPriority();
2652        try {
2653            SEARCH_MANAGER_LOCK.lock();
2654            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2655            List<CmsPublishedResource> publishedResources;
2656            try {
2657                // read the list of all published resources
2658                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2659            } catch (CmsException e) {
2660                LOG.error(
2661                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2662                    e);
2663                return;
2664            }
2665            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2666            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2667
2668            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2669            for (CmsPublishedResource res : publishedResources) {
2670                if (res.isFolder() || res.getState().isUnchanged()) {
2671                    // folders and unchanged resources don't need to be indexed after publish
2672                    continue;
2673                }
2674                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2675                    if (updateResources.contains(res)) {
2676                        // resource may have been added as a sibling of another resource
2677                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2678                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
2679                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
2680                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
2681                        // check it this is a moved resource with source / target info, in this case we need both entries
2682                        if (!hasMoved) {
2683                            // if the resource was moved, we must contain both entries
2684                            updateResources.remove(res);
2685                        }
2686                        // "equals()" implementation of published resource checks for id,
2687                        // so the removed value may have a different "deleted" or "modified" status value
2688                        updateResources.add(res);
2689                    } else {
2690                        // resource not yet contained in the list
2691                        updateResources.add(res);
2692                        // check for the siblings (not for deleted resources, these are already gone)
2693                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
2694                            // this resource has siblings
2695                            try {
2696                                // read siblings from the online project
2697                                List<CmsResource> siblings = adminCms.readSiblings(
2698                                    res.getRootPath(),
2699                                    CmsResourceFilter.ALL);
2700                                Iterator<CmsResource> itSib = siblings.iterator();
2701                                while (itSib.hasNext()) {
2702                                    // check all siblings
2703                                    CmsResource sibling = itSib.next();
2704                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
2705                                    if (!updateResources.contains(sib)) {
2706                                        // ensure sibling is added only once
2707                                        updateResources.add(sib);
2708                                    }
2709                                }
2710                            } catch (CmsException e) {
2711                                // ignore, just use the original resource
2712                                if (LOG.isWarnEnabled()) {
2713                                    LOG.warn(
2714                                        Messages.get().getBundle().key(
2715                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
2716                                            res.getRootPath()),
2717                                        e);
2718                                }
2719                            }
2720                        }
2721                    }
2722                }
2723            }
2724
2725            findRelatedContainerPages(adminCms, updateResources);
2726            if (!updateResources.isEmpty()) {
2727                // sort the resource to update
2728                Collections.sort(updateResources);
2729                // only update the indexes if the list of remaining published resources is not empty
2730                Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2731                while (i.hasNext()) {
2732                    I_CmsSearchIndex index = i.next();
2733                    if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
2734                        // only update indexes which have the rebuild mode set to "auto"
2735                        try {
2736                            updateIndex(index, report, updateResources);
2737                        } catch (CmsException e) {
2738                            LOG.error(
2739                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
2740                                e);
2741                        }
2742                    }
2743                }
2744            }
2745            // clean up the extraction result cache
2746            cleanExtractionCache();
2747        } finally {
2748            SEARCH_MANAGER_LOCK.unlock();
2749            Thread.currentThread().setPriority(oldPriority);
2750        }
2751    }
2752
2753    /**
2754     * Updates (if required creates) the index with the given name.<p>
2755     *
2756     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
2757     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
2758     * the index will be fully rebuild.<p>
2759     *
2760     * @param index the index to update or rebuild
2761     * @param report the report to write output messages to
2762     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
2763     *
2764     * @throws CmsException if something goes wrong
2765     */
2766    protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
2767    throws CmsException {
2768
2769        if (shouldUpdateAtAll(index)) {
2770            try {
2771                SEARCH_MANAGER_LOCK.lock();
2772
2773                // copy the stored admin context for the indexing
2774                CmsObject cms = OpenCms.initCmsObject(m_adminCms);
2775                // make sure a report is available
2776                if (report == null) {
2777                    report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
2778                }
2779
2780                // check if the index has been configured correctly
2781                if (!index.checkConfiguration(cms)) {
2782                    // the index is disabled
2783                    return;
2784                }
2785
2786                // set site root and project for this index
2787                cms.getRequestContext().setSiteRoot("/");
2788                // switch to the index project
2789                cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
2790
2791                if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
2792                    // rebuild the complete index
2793
2794                    updateIndexCompletely(cms, index, report);
2795                } else {
2796                    updateIndexIncremental(cms, index, report, resourcesToIndex);
2797                }
2798            } finally {
2799                SEARCH_MANAGER_LOCK.unlock();
2800            }
2801        }
2802    }
2803
2804    /**
2805     * The method updates all OpenCms documents that are indexed.
2806     * @param cms the OpenCms user context to use for accessing the VFS
2807     * @param index the index to update
2808     * @param report the report to write output messages to
2809     * @throws CmsIndexException thrown if indexing fails for some reason
2810     */
2811    @SuppressWarnings("null")
2812    protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report)
2813    throws CmsIndexException {
2814
2815        // create a new thread manager for the indexing threads
2816        CmsIndexingThreadManager threadManager = getThreadManager();
2817
2818        boolean isOfflineIndex = false;
2819        if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
2820            // disable offline indexing while the complete index is rebuild
2821            isOfflineIndex = true;
2822            index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL);
2823            // re-initialize the offline indexes, this will disable this offline index
2824            initOfflineIndexes();
2825        }
2826
2827        I_CmsIndexWriter writer = null;
2828        try {
2829            // create a backup of the existing index
2830            CmsSearchIndex indexInternal = null;
2831            String backup = null;
2832            if (index instanceof CmsSearchIndex) {
2833                indexInternal = (CmsSearchIndex)index;
2834                backup = indexInternal.createIndexBackup();
2835                if (backup != null) {
2836                    indexInternal.indexSearcherOpen(backup);
2837                }
2838            }
2839
2840            // create a new index writer
2841            writer = index.getIndexWriter(report, true);
2842            if (writer instanceof I_CmsSolrIndexWriter) {
2843                try {
2844                    ((I_CmsSolrIndexWriter)writer).deleteAllDocuments();
2845                } catch (IOException e) {
2846                    LOG.error(e.getMessage(), e);
2847                }
2848            }
2849
2850            // output start information on the report
2851            report.println(
2852                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
2853                I_CmsReport.FORMAT_HEADLINE);
2854
2855            // iterate all configured index sources of this index
2856            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
2857            while (sources.hasNext()) {
2858                // get the next index source
2859                CmsSearchIndexSource source = sources.next();
2860                // create the indexer
2861                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
2862                // new index creation, use all resources from the index source
2863                indexer.rebuildIndex(writer, threadManager, source);
2864
2865                // wait for indexing threads to finish
2866                while (threadManager.isRunning()) {
2867                    try {
2868                        Thread.sleep(500);
2869                    } catch (InterruptedException e) {
2870                        // just continue with the loop after interruption
2871                        LOG.info(e.getLocalizedMessage(), e);
2872                    }
2873                }
2874
2875                // commit and optimize the index after each index source has been finished
2876                try {
2877                    writer.commit();
2878                } catch (IOException e) {
2879                    if (LOG.isWarnEnabled()) {
2880                        LOG.warn(
2881                            Messages.get().getBundle().key(
2882                                Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
2883                                index.getName(),
2884                                index.getPath()),
2885                            e);
2886                    }
2887                }
2888                try {
2889                    writer.optimize();
2890                } catch (IOException e) {
2891                    if (LOG.isWarnEnabled()) {
2892                        LOG.warn(
2893                            Messages.get().getBundle().key(
2894                                Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
2895                                index.getName(),
2896                                index.getPath()),
2897                            e);
2898                    }
2899                }
2900            }
2901
2902            // we are sure here that indexInternal is not null
2903            if (backup != null) {
2904                // remove the backup after the files have been re-indexed
2905                indexInternal.indexSearcherClose();
2906                indexInternal.removeIndexBackup(backup);
2907            }
2908
2909            // output finish information on the report
2910            report.println(
2911                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
2912                I_CmsReport.FORMAT_HEADLINE);
2913
2914        } finally {
2915            if (writer != null) {
2916                try {
2917                    writer.close();
2918                } catch (IOException e) {
2919                    if (LOG.isWarnEnabled()) {
2920                        LOG.warn(
2921                            Messages.get().getBundle().key(
2922                                Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
2923                                index.getPath(),
2924                                index.getName()),
2925                            e);
2926                    }
2927                }
2928            }
2929            if (isOfflineIndex) {
2930                // reset the mode of the offline index
2931                index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE);
2932                // re-initialize the offline indexes, this will re-enable this index
2933                initOfflineIndexes();
2934            }
2935            // index has changed - initialize the index searcher instance
2936            index.onIndexChanged(true);
2937        }
2938
2939        // show information about indexing runtime
2940        threadManager.reportStatistics(report);
2941    }
2942
2943    /**
2944     * Incrementally updates the given index.<p>
2945     *
2946     * @param cms the OpenCms user context to use for accessing the VFS
2947     * @param index the index to update
2948     * @param report the report to write output messages to
2949     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
2950     *
2951     * @throws CmsException if something goes wrong
2952     */
2953    protected void updateIndexIncremental(
2954        CmsObject cms,
2955        I_CmsSearchIndex index,
2956        I_CmsReport report,
2957        List<CmsPublishedResource> resourcesToIndex)
2958    throws CmsException {
2959
2960        try {
2961            SEARCH_MANAGER_LOCK.lock();
2962
2963            // update the existing index
2964            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
2965
2966            boolean hasResourcesToDelete = false;
2967            boolean hasResourcesToUpdate = false;
2968
2969            // iterate all configured index sources of this index
2970            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
2971            while (sources.hasNext()) {
2972                // get the next index source
2973                CmsSearchIndexSource source = sources.next();
2974                // create the indexer
2975                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
2976                // collect the resources to update
2977                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
2978                if (!updateData.isEmpty()) {
2979                    // add the update collection to the internal pipeline
2980                    updateCollections.add(updateData);
2981                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
2982                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
2983                }
2984            }
2985
2986            // only start index modification if required
2987            if (hasResourcesToDelete || hasResourcesToUpdate) {
2988                // output start information on the report
2989                report.println(
2990                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
2991                    I_CmsReport.FORMAT_HEADLINE);
2992
2993                I_CmsIndexWriter writer = null;
2994                try {
2995                    // obtain an index writer that updates the current index
2996                    writer = index.getIndexWriter(report, false);
2997
2998                    if (hasResourcesToDelete) {
2999                        // delete the resource from the index
3000                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3001                        while (i.hasNext()) {
3002                            CmsSearchIndexUpdateData updateCollection = i.next();
3003                            if (updateCollection.hasResourcesToDelete()) {
3004                                updateCollection.getIndexer().deleteResources(
3005                                    writer,
3006                                    updateCollection.getResourcesToDelete());
3007                            }
3008                        }
3009                    }
3010
3011                    if (hasResourcesToUpdate) {
3012                        // create a new thread manager
3013                        CmsIndexingThreadManager threadManager = getThreadManager();
3014
3015                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3016                        while (i.hasNext()) {
3017                            CmsSearchIndexUpdateData updateCollection = i.next();
3018                            if (updateCollection.hasResourceToUpdate()) {
3019                                updateCollection.getIndexer().updateResources(
3020                                    writer,
3021                                    threadManager,
3022                                    updateCollection.getResourcesToUpdate());
3023                            }
3024                        }
3025
3026                        // wait for indexing threads to finish
3027                        while (threadManager.isRunning()) {
3028                            try {
3029                                Thread.sleep(500);
3030                            } catch (InterruptedException e) {
3031                                // just continue with the loop after interruption
3032                                LOG.info(e.getLocalizedMessage(), e);
3033                            }
3034                        }
3035                    }
3036                } finally {
3037                    // close the index writer
3038                    if (writer != null) {
3039                        try {
3040                            writer.commit();
3041                        } catch (IOException e) {
3042                            LOG.error(
3043                                Messages.get().getBundle().key(
3044                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3045                                    index.getName(),
3046                                    index.getPath()),
3047                                e);
3048                        }
3049                    }
3050                    // index has changed - initialize the index searcher instance
3051                    index.onIndexChanged(false);
3052                }
3053
3054                // output finish information on the report
3055                report.println(
3056                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3057                    I_CmsReport.FORMAT_HEADLINE);
3058            }
3059        } finally {
3060            SEARCH_MANAGER_LOCK.unlock();
3061        }
3062    }
3063
3064    /**
3065     * Updates the offline search indexes for the given list of resources.<p>
3066     *
3067     * @param report the report to write the index information to
3068     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3069     */
3070    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3071
3072        CmsObject cms = m_adminCms;
3073        try {
3074            // copy the administration context for the indexing
3075            cms = OpenCms.initCmsObject(m_adminCms);
3076            // set site root and project for this index
3077            cms.getRequestContext().setSiteRoot("/");
3078        } catch (CmsException e) {
3079            LOG.error(e.getLocalizedMessage(), e);
3080        }
3081
3082        Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator();
3083        while (j.hasNext()) {
3084            I_CmsSearchIndex index = j.next();
3085            if (index.getSources() != null) {
3086                try {
3087                    // switch to the index project
3088                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3089                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3090                } catch (CmsException e) {
3091                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3092                }
3093            }
3094        }
3095    }
3096
3097    /**
3098     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3099     *
3100     * @param adminCms the cms context
3101     * @param containerPages the containerpages
3102     * @param containerPage the container page site path
3103     */
3104    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3105
3106        if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) {
3107
3108            try {
3109                CmsResource detailRes = adminCms.readResource(
3110                    CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage),
3111                    CmsResourceFilter.IGNORE_EXPIRATION);
3112                containerPages.add(detailRes);
3113            } catch (Throwable e) {
3114                if (LOG.isWarnEnabled()) {
3115                    LOG.warn(e.getLocalizedMessage(), e);
3116                }
3117            }
3118        }
3119    }
3120
3121    /**
3122     * Creates the Solr core container.<p>
3123     *
3124     * @return the created core container
3125     */
3126    private CoreContainer createCoreContainer() {
3127
3128        CoreContainer container = null;
3129        try {
3130            // get the core container
3131            // still no core container: create it
3132            container = CoreContainer.createAndLoad(
3133                Paths.get(m_solrConfig.getHome()),
3134                m_solrConfig.getSolrFile().toPath());
3135            if (CmsLog.INIT.isInfoEnabled()) {
3136                CmsLog.INIT.info(
3137                    Messages.get().getBundle().key(
3138                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3139                        m_solrConfig.getHome(),
3140                        m_solrConfig.getSolrFile().getName()));
3141            }
3142        } catch (Exception e) {
3143            LOG.error(
3144                Messages.get().getBundle().key(
3145                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3146                    m_solrConfig.getSolrFile().getAbsolutePath()),
3147                e);
3148        }
3149        return container;
3150
3151    }
3152
3153    /**
3154     * Remove write.lock file in the data directory to ensure the index is unlocked.
3155     * @param dataDir the data directory of the Solr index that should be unlocked.
3156     */
3157    private void ensureIndexIsUnlocked(String dataDir) {
3158
3159        Collection<File> lockFiles = new ArrayList<File>(2);
3160        lockFiles.add(
3161            new File(
3162                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock"));
3163        lockFiles.add(
3164            new File(
3165                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck")
3166                    + "write.lock"));
3167        for (File lockFile : lockFiles) {
3168            if (lockFile.exists()) {
3169                lockFile.delete();
3170                LOG.warn(
3171                    "Forcely unlocking index with data dir \""
3172                        + dataDir
3173                        + "\" by removing file \""
3174                        + lockFile.getAbsolutePath()
3175                        + "\".");
3176            }
3177        }
3178    }
3179
3180    /**
3181     * Returns the report in the given event data, if <code>null</code>
3182     * a new log report is used.<p>
3183     *
3184     * @param event the event to get the report for
3185     *
3186     * @return the report
3187     */
3188    private I_CmsReport getEventReport(CmsEvent event) {
3189
3190        I_CmsReport report = null;
3191        if (event.getData() != null) {
3192            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3193        }
3194        if (report == null) {
3195            report = new CmsLogReport(Locale.ENGLISH, getClass());
3196        }
3197        return report;
3198    }
3199
3200    /**
3201     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3202     *
3203     * @param publishedResources a list of published resources
3204     *
3205     * @return the set of structure ids that satisfy the condition above
3206     */
3207    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3208        List<CmsPublishedResource> publishedResources) {
3209
3210        Set<CmsUUID> result = new HashSet<CmsUUID>();
3211        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3212        for (CmsPublishedResource pubRes : publishedResources) {
3213            if (pubRes.getState().isNew()) {
3214                result.add(pubRes.getStructureId());
3215            }
3216            if (pubRes.getState().isDeleted()) {
3217                deletedSet.add(pubRes.getStructureId());
3218            }
3219        }
3220        result.retainAll(deletedSet);
3221        return result;
3222    }
3223
3224    /**
3225     * Shuts down the Solr core container.<p>
3226     */
3227    private void shutDownSolrContainer() {
3228
3229        if (m_coreContainer != null) {
3230            for (SolrCore core : m_coreContainer.getCores()) {
3231                // do not unload spellcheck core because otherwise the core.properties file is removed
3232                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3233                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3234                    m_coreContainer.unload(core.getName(), false, false, true);
3235                }
3236            }
3237            m_coreContainer.shutdown();
3238            if (CmsLog.INIT.isInfoEnabled()) {
3239                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3240            }
3241            m_coreContainer = null;
3242        }
3243    }
3244
3245}