001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 031import org.opencms.configuration.CmsConfigurationException; 032import org.opencms.db.CmsDriverManager; 033import org.opencms.db.CmsPublishedResource; 034import org.opencms.db.CmsResourceState; 035import org.opencms.file.CmsObject; 036import org.opencms.file.CmsProject; 037import org.opencms.file.CmsResource; 038import org.opencms.file.CmsResourceFilter; 039import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 040import org.opencms.file.types.CmsResourceTypeXmlContent; 041import org.opencms.i18n.CmsMessageContainer; 042import org.opencms.loader.CmsLoaderException; 043import org.opencms.main.CmsEvent; 044import org.opencms.main.CmsException; 045import org.opencms.main.CmsIllegalArgumentException; 046import org.opencms.main.CmsIllegalStateException; 047import org.opencms.main.CmsLog; 048import org.opencms.main.I_CmsEventListener; 049import org.opencms.main.OpenCms; 050import org.opencms.main.OpenCmsSolrHandler; 051import org.opencms.relations.CmsRelation; 052import org.opencms.relations.CmsRelationFilter; 053import org.opencms.report.CmsLogReport; 054import org.opencms.report.I_CmsReport; 055import org.opencms.scheduler.I_CmsScheduledJob; 056import org.opencms.search.documents.A_CmsVfsDocument; 057import org.opencms.search.documents.CmsExtractionResultCache; 058import org.opencms.search.documents.I_CmsDocumentFactory; 059import org.opencms.search.documents.I_CmsTermHighlighter; 060import org.opencms.search.fields.CmsLuceneField; 061import org.opencms.search.fields.CmsLuceneFieldConfiguration; 062import org.opencms.search.fields.CmsSearchField; 063import org.opencms.search.fields.CmsSearchFieldConfiguration; 064import org.opencms.search.fields.CmsSearchFieldMapping; 065import org.opencms.search.fields.I_CmsSearchFieldConfiguration; 066import org.opencms.search.solr.CmsSolrConfiguration; 067import org.opencms.search.solr.CmsSolrFieldConfiguration; 068import org.opencms.search.solr.CmsSolrIndex; 069import org.opencms.search.solr.I_CmsSolrIndexWriter; 070import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 071import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer; 072import org.opencms.security.CmsRole; 073import org.opencms.security.CmsRoleViolationException; 074import org.opencms.util.A_CmsModeStringEnumeration; 075import org.opencms.util.CmsFileUtil; 076import org.opencms.util.CmsStringUtil; 077import org.opencms.util.CmsUUID; 078import org.opencms.util.CmsWaitHandle; 079 080import java.io.File; 081import java.io.IOException; 082import java.nio.file.FileSystems; 083import java.nio.file.Paths; 084import java.util.ArrayList; 085import java.util.Collection; 086import java.util.Collections; 087import java.util.HashMap; 088import java.util.HashSet; 089import java.util.Iterator; 090import java.util.List; 091import java.util.Locale; 092import java.util.Map; 093import java.util.Set; 094import java.util.TreeMap; 095import java.util.concurrent.locks.ReentrantLock; 096 097import org.apache.commons.logging.Log; 098import org.apache.lucene.analysis.Analyzer; 099import org.apache.lucene.analysis.CharArraySet; 100import org.apache.lucene.analysis.standard.StandardAnalyzer; 101import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 102import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder; 103import org.apache.solr.core.CoreContainer; 104import org.apache.solr.core.CoreDescriptor; 105import org.apache.solr.core.SolrCore; 106 107/** 108 * Implements the general management and configuration of the search and 109 * indexing facilities in OpenCms.<p> 110 * 111 * @since 6.0.0 112 */ 113public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 114 115 /** 116 * Enumeration class for force unlock types.<p> 117 */ 118 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 119 120 /** Force unlock type "always". */ 121 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 122 123 /** Force unlock type "never". */ 124 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 125 126 /** Force unlock type "only full". */ 127 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 128 129 /** Serializable version id. */ 130 private static final long serialVersionUID = 74746076708908673L; 131 132 /** 133 * Creates a new force unlock type with the given name.<p> 134 * 135 * @param mode the mode id to use 136 */ 137 protected CmsSearchForceUnlockMode(String mode) { 138 139 super(mode); 140 } 141 142 /** 143 * Returns the lock type for the given type value.<p> 144 * 145 * @param type the type value to get the lock type for 146 * 147 * @return the lock type for the given type value 148 */ 149 public static CmsSearchForceUnlockMode valueOf(String type) { 150 151 if (type.equals(ALWAYS.toString())) { 152 return ALWAYS; 153 } else if (type.equals(NEVER.toString())) { 154 return NEVER; 155 } else { 156 return ONLYFULL; 157 } 158 } 159 } 160 161 /** 162 * Handles offline index generation.<p> 163 */ 164 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 165 166 /** Indicates if the event handlers for the offline search have been already registered. */ 167 private boolean m_isEventRegistered; 168 169 /** The list of resources to index. */ 170 private List<CmsPublishedResource> m_resourcesToIndex; 171 172 /** 173 * Initializes the offline index handler.<p> 174 */ 175 protected CmsSearchOfflineHandler() { 176 177 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 178 } 179 180 /** 181 * Implements the event listener of this class.<p> 182 * 183 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 184 */ 185 @SuppressWarnings("unchecked") 186 public void cmsEvent(CmsEvent event) { 187 188 switch (event.getType()) { 189 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 190 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 191 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 192 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 193 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 194 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 195 // skip lock & unlock 196 return; 197 } 198 // skip indexing if flag is set in event 199 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 200 if (skip != null) { 201 return; 202 } 203 204 // a resource has been modified - offline indexes require (re)indexing 205 List<CmsResource> resources = Collections.singletonList( 206 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 207 reIndexResources(resources); 208 break; 209 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 210 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 211 I_CmsEventListener.KEY_RESOURCES); 212 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 213 for (CmsResource res : resourcesToDelete) { 214 if (res.getState().isNew()) { 215 // if the resource is new and a delete action was performed 216 // --> set the state of the resource to deleted 217 res.setState(CmsResourceState.STATE_DELETED); 218 } 219 } 220 reIndexResources(resourcesToDelete); 221 break; 222 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 223 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 224 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 225 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 226 // a list of resources has been modified - offline indexes require (re)indexing 227 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 228 break; 229 default: 230 // no operation 231 } 232 } 233 234 /** 235 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 236 * 237 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 238 */ 239 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 240 241 m_resourcesToIndex.addAll(resourcesToIndex); 242 } 243 244 /** 245 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 246 * 247 * @return the resources to index 248 */ 249 protected List<CmsPublishedResource> getResourcesToIndex() { 250 251 List<CmsPublishedResource> result; 252 synchronized (this) { 253 result = m_resourcesToIndex; 254 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 255 } 256 try { 257 CmsObject cms = m_adminCms; 258 CmsProject offline = getOfflineIndexProject(); 259 if (offline != null) { 260 // switch to the offline project if available 261 cms = OpenCms.initCmsObject(m_adminCms); 262 cms.getRequestContext().setCurrentProject(offline); 263 } 264 findRelatedContainerPages(cms, result); 265 } catch (CmsException e) { 266 LOG.error(e.getLocalizedMessage(), e); 267 } 268 return result; 269 } 270 271 /** 272 * Initializes this offline search handler, registering the event handlers if required.<p> 273 */ 274 protected void initialize() { 275 276 if (m_offlineIndexes.size() > 0) { 277 // there is at least one offline index configured 278 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 279 // create the offline indexing thread 280 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 281 // start the offline index thread 282 m_offlineIndexThread.start(); 283 } 284 } else { 285 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 286 // no offline indexes but thread still running, stop the thread 287 m_offlineIndexThread.shutDown(); 288 m_offlineIndexThread = null; 289 } 290 } 291 // do this only in case there are offline indexes configured 292 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 293 m_isEventRegistered = true; 294 // register this object as event listener 295 OpenCms.addCmsEventListener( 296 this, 297 new int[] { 298 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 299 I_CmsEventListener.EVENT_RESOURCE_CREATED, 300 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 301 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 302 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 303 I_CmsEventListener.EVENT_RESOURCE_MOVED, 304 I_CmsEventListener.EVENT_RESOURCE_DELETED, 305 I_CmsEventListener.EVENT_RESOURCE_COPIED, 306 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 307 } 308 } 309 310 /** 311 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 312 * 313 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 314 */ 315 protected synchronized void reIndexResources(List<CmsResource> resources) { 316 317 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 318 for (CmsResource res : resources) { 319 CmsPublishedResource pubRes = new CmsPublishedResource(res); 320 resourcesToIndex.add(pubRes); 321 } 322 if (resourcesToIndex.size() > 0) { 323 // add the resources found to the offline index thread 324 addResourcesToIndex(resourcesToIndex); 325 } 326 } 327 } 328 329 /** 330 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 331 */ 332 protected class CmsSearchOfflineIndexThread extends Thread { 333 334 /** The event handler that triggers this thread. */ 335 CmsSearchOfflineHandler m_handler; 336 337 /** Indicates if this thread is still alive. */ 338 boolean m_isAlive; 339 340 /** Indicates that an index update thread is currently running. */ 341 private boolean m_isUpdating; 342 343 /** If true a manual update (after file upload) was triggered. */ 344 private boolean m_updateTriggered; 345 346 /** The wait handle used for signalling when the worker thread has finished. */ 347 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 348 349 /** 350 * Constructor.<p> 351 * 352 * @param handler the offline index event handler 353 */ 354 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 355 356 super("OpenCms: Offline Search Indexer"); 357 m_handler = handler; 358 } 359 360 /** 361 * Gets the wait handle used for signalling when the worker thread has finished. 362 * 363 * @return the wait handle 364 **/ 365 public CmsWaitHandle getWaitHandle() { 366 367 return m_waitHandle; 368 } 369 370 /** 371 * @see java.lang.Thread#interrupt() 372 */ 373 @Override 374 public void interrupt() { 375 376 super.interrupt(); 377 m_updateTriggered = true; 378 } 379 380 /** 381 * @see java.lang.Thread#run() 382 */ 383 @Override 384 public void run() { 385 386 // create a log report for the output 387 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 388 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 389 m_updateTriggered = false; 390 try { 391 while (m_isAlive) { 392 if (!m_updateTriggered) { 393 try { 394 sleep(offlineUpdateFrequency); 395 } catch (InterruptedException e) { 396 // continue the thread after interruption 397 if (!m_isAlive) { 398 // the thread has been shut down while sleeping 399 continue; 400 } 401 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 402 // offline update frequency change - clear interrupt status 403 offlineUpdateFrequency = getOfflineUpdateFrequency(); 404 } 405 LOG.info(e.getLocalizedMessage(), e); 406 } 407 } 408 if (m_isAlive) { 409 // set update trigger to false since we do the update now 410 m_updateTriggered = false; 411 // get list of resource to update 412 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 413 if (resourcesToIndex.size() > 0) { 414 // only start indexing if there is at least one resource 415 startOfflineUpdateThread(report, resourcesToIndex); 416 } else { 417 getWaitHandle().release(); 418 } 419 // this is just called to clear the interrupt status of the thread 420 interrupted(); 421 } 422 } 423 } finally { 424 // make sure that live status is reset in case of Exceptions 425 m_isAlive = false; 426 } 427 428 } 429 430 /** 431 * @see java.lang.Thread#start() 432 */ 433 @Override 434 public synchronized void start() { 435 436 m_isAlive = true; 437 super.start(); 438 } 439 440 /** 441 * Obtains the list of resource to update in the offline index, 442 * then optimizes the list by removing duplicate entries.<p> 443 * 444 * @return the list of resource to update in the offline index 445 */ 446 protected List<CmsPublishedResource> getResourcesToIndex() { 447 448 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 449 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 450 451 // Reverse to always keep the last list entries 452 Collections.reverse(resourcesToIndex); 453 for (CmsPublishedResource pubRes : resourcesToIndex) { 454 boolean addResource = true; 455 for (CmsPublishedResource resRes : result) { 456 if (pubRes.equals(resRes) 457 && (pubRes.getState() == resRes.getState()) 458 && (pubRes.getMovedState() == resRes.getMovedState()) 459 && pubRes.getRootPath().equals(resRes.getRootPath())) { 460 // resource already in the update list 461 addResource = false; 462 break; 463 } 464 } 465 if (addResource) { 466 result.add(pubRes); 467 } 468 469 } 470 Collections.reverse(result); 471 return changeStateOfMoveOriginsToDeleted(result); 472 } 473 474 /** 475 * Shuts down this offline index thread.<p> 476 */ 477 protected void shutDown() { 478 479 m_isAlive = false; 480 interrupt(); 481 if (m_isUpdating) { 482 long waitTime = getOfflineUpdateFrequency() / 2; 483 int waitSteps = 0; 484 do { 485 try { 486 // wait half the time of the offline index frequency for the thread to finish 487 Thread.sleep(waitTime); 488 } catch (InterruptedException e) { 489 // continue 490 LOG.info(e.getLocalizedMessage(), e); 491 } 492 waitSteps++; 493 // wait 5 times then stop waiting 494 } while ((waitSteps < 5) && m_isUpdating); 495 } 496 } 497 498 /** 499 * Updates the offline search indexes for the given list of resources.<p> 500 * 501 * @param report the report to write the index information to 502 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 503 */ 504 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 505 506 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 507 long startTime = System.currentTimeMillis(); 508 long waitTime = getOfflineUpdateFrequency() / 2; 509 if (LOG.isDebugEnabled()) { 510 LOG.debug( 511 Messages.get().getBundle().key( 512 Messages.LOG_OI_UPDATE_START_1, 513 Integer.valueOf(resourcesToIndex.size()))); 514 } 515 516 m_isUpdating = true; 517 thread.start(); 518 519 do { 520 try { 521 // wait half the time of the offline index frequency for the thread to finish 522 thread.join(waitTime); 523 } catch (InterruptedException e) { 524 // continue 525 LOG.info(e.getLocalizedMessage(), e); 526 } 527 if (thread.isAlive()) { 528 LOG.warn( 529 Messages.get().getBundle().key( 530 Messages.LOG_OI_UPDATE_LONG_2, 531 Integer.valueOf(resourcesToIndex.size()), 532 Long.valueOf(System.currentTimeMillis() - startTime))); 533 } 534 } while (thread.isAlive()); 535 m_isUpdating = false; 536 537 if (LOG.isDebugEnabled()) { 538 LOG.debug( 539 Messages.get().getBundle().key( 540 Messages.LOG_OI_UPDATE_FINISH_2, 541 Integer.valueOf(resourcesToIndex.size()), 542 Long.valueOf(System.currentTimeMillis() - startTime))); 543 } 544 } 545 546 /** 547 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 548 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 549 * 550 * @param resourcesToIndex the resources to index 551 * 552 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 553 */ 554 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 555 List<CmsPublishedResource> resourcesToIndex) { 556 557 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 558 for (CmsPublishedResource resource : resourcesToIndex) { 559 if (resource.getState().isDeleted()) { 560 // we don't want the last path to be from a deleted resource 561 continue; 562 } 563 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 564 } 565 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 566 for (CmsPublishedResource resource : resourcesToIndex) { 567 if (resource.getState().isDeleted()) { 568 result.add(resource); 569 continue; 570 } 571 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 572 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 573 result.add(resource); 574 } else { 575 result.add( 576 new CmsPublishedResource( 577 resource.getStructureId(), 578 resource.getResourceId(), 579 resource.getPublishTag(), 580 resource.getRootPath(), 581 resource.getType(), 582 resource.isFolder(), 583 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 584 resource.getSiblingCount())); 585 } 586 } 587 return result; 588 } 589 } 590 591 /** 592 * An offline index worker Thread runs each time for every offline index update action.<p> 593 * 594 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 595 * problems if a single operation "hangs" the Tread.<p> 596 */ 597 protected class CmsSearchOfflineIndexWorkThread extends Thread { 598 599 /** The report to write the index information to. */ 600 I_CmsReport m_report; 601 602 /** The list of {@link CmsPublishedResource} objects to index. */ 603 List<CmsPublishedResource> m_resourcesToIndex; 604 605 /** 606 * Updates the offline search indexes for the given list of resources.<p> 607 * 608 * @param report the report to write the index information to 609 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 610 */ 611 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 612 613 super("OpenCms: Offline Search Index Worker"); 614 m_report = report; 615 m_resourcesToIndex = resourcesToIndex; 616 } 617 618 /** 619 * @see java.lang.Thread#run() 620 */ 621 @Override 622 public void run() { 623 624 updateIndexOffline(m_report, m_resourcesToIndex); 625 if (m_offlineIndexThread != null) { 626 m_offlineIndexThread.getWaitHandle().release(); 627 } 628 } 629 } 630 631 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 632 private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); 633 634 /** The default value used for generating search result excerpts (1024 chars). */ 635 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 636 637 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 638 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 639 640 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 641 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 642 643 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 644 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 645 646 /** The default maximal wait time for re-indexing after editing a content. */ 647 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 648 649 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 650 public static final int DEFAULT_TIMEOUT = 60000; 651 652 /** Scheduler parameter: Update only a specified list of indexes. */ 653 public static final String JOB_PARAM_INDEXLIST = "indexList"; 654 655 /** Scheduler parameter: Write the output of the update to the logfile. */ 656 public static final String JOB_PARAM_WRITELOG = "writeLog"; 657 658 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 659 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 660 661 /** The log object for this class. */ 662 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 663 664 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 665 protected CmsObject m_adminCms; 666 667 /** The list of indexes that are configured for offline index mode. */ 668 protected List<I_CmsSearchIndex> m_offlineIndexes; 669 670 /** The thread used of offline indexing. */ 671 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 672 673 /** Configured analyzers for languages using <analyzer>. */ 674 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 675 676 /** Stores the offline update frequency while indexing is paused. */ 677 private long m_configuredOfflineIndexingFrequency; 678 679 /** The Solr core container. */ 680 private CoreContainer m_coreContainer; 681 682 /** A map of document factory configurations. */ 683 private List<CmsSearchDocumentType> m_documentTypeConfigs; 684 685 /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */ 686 private Map<String, I_CmsDocumentFactory> m_documentTypes; 687 688 /** The max age for extraction results to remain in the cache. */ 689 private float m_extractionCacheMaxAge; 690 691 /** The cache for the extraction results. */ 692 private CmsExtractionResultCache m_extractionResultCache; 693 694 /** Contains the available field configurations. */ 695 private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations; 696 697 /** The force unlock type. */ 698 private CmsSearchForceUnlockMode m_forceUnlockMode; 699 700 /** The class used to highlight the search terms in the excerpt of a search result. */ 701 private I_CmsTermHighlighter m_highlighter; 702 703 /** A list of search indexes. */ 704 private List<I_CmsSearchIndex> m_indexes; 705 706 /** Seconds to wait for an index lock. */ 707 private int m_indexLockMaxWaitSeconds = 10; 708 709 /** Configured index sources. */ 710 private Map<String, CmsSearchIndexSource> m_indexSources; 711 712 /** The max. char. length of the excerpt in the search result. */ 713 private int m_maxExcerptLength; 714 715 /** The maximum number of modifications before a commit in the search index is triggered. */ 716 private int m_maxModificationsBeforeCommit; 717 718 /** The offline index search handler. */ 719 private CmsSearchOfflineHandler m_offlineHandler; 720 721 /** The update frequency of the offline indexer in milliseconds. */ 722 private long m_offlineUpdateFrequency; 723 724 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 725 private long m_maxIndexWaitTime; 726 727 /** Path to index files below WEB-INF/. */ 728 private String m_path; 729 730 /** The Solr configuration. */ 731 private CmsSolrConfiguration m_solrConfig; 732 733 /** Timeout for abandoning indexing thread. */ 734 private long m_timeout; 735 736 /** 737 * Default constructor when called as cron job.<p> 738 */ 739 public CmsSearchManager() { 740 741 m_documentTypes = new HashMap<String, I_CmsDocumentFactory>(); 742 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 743 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 744 m_indexes = new ArrayList<I_CmsSearchIndex>(); 745 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 746 m_offlineHandler = new CmsSearchOfflineHandler(); 747 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 748 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 749 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 750 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 751 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 752 753 m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>(); 754 // make sure we have a "standard" field configuration 755 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 756 757 if (CmsLog.INIT.isInfoEnabled()) { 758 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 759 } 760 } 761 762 /** 763 * Returns an analyzer for the given class name.<p> 764 * 765 * @param className the class name of the analyzer 766 * 767 * @return the appropriate lucene analyzer 768 * 769 * @throws Exception if something goes wrong 770 */ 771 public static Analyzer getAnalyzer(String className) throws Exception { 772 773 Analyzer analyzer = null; 774 Class<?> analyzerClass; 775 try { 776 analyzerClass = Class.forName(className); 777 } catch (ClassNotFoundException e) { 778 // allow Lucene standard classes to be written in a short form 779 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 780 } 781 782 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 783 if (StandardAnalyzer.class.equals(analyzerClass)) { 784 // the Lucene standard analyzer is used - but without any stopwords. 785 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 786 } else { 787 analyzer = (Analyzer)analyzerClass.newInstance(); 788 } 789 return analyzer; 790 } 791 792 /** 793 * Returns the Solr index configured with the parameters name. 794 * The parameters must contain a key/value pair with an existing 795 * Solr index, otherwise <code>null</code> is returned.<p> 796 * 797 * @param cms the current context 798 * @param params the parameter map 799 * 800 * @return the best matching Solr index 801 */ 802 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 803 804 String indexName = null; 805 CmsSolrIndex index = null; 806 // try to get the index name from the parameters: 'core' or 'index' 807 if (params != null) { 808 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 809 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 810 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 811 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 812 : null); 813 } 814 if (indexName == null) { 815 // if no parameter is specified try to use the default online/offline indexes by context 816 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 817 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 818 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 819 } 820 // try to get the index 821 index = indexName != null ? OpenCms.getSearchManager().getIndexSolr(indexName) : null; 822 if (index == null) { 823 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 824 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 825 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 826 index = solrs.get(0); 827 } 828 } 829 return index; 830 } 831 832 /** 833 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 834 * 835 * @param indexName the name of the index to check 836 * 837 * @return <code>true</code> if the index for the given name is a Lucene index 838 */ 839 public static boolean isLuceneIndex(String indexName) { 840 841 I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 842 return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex)); 843 } 844 845 /** 846 * Adds an analyzer.<p> 847 * 848 * @param analyzer an analyzer 849 */ 850 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 851 852 m_analyzers.put(analyzer.getLocale(), analyzer); 853 854 if (CmsLog.INIT.isInfoEnabled()) { 855 CmsLog.INIT.info( 856 Messages.get().getBundle().key( 857 Messages.INIT_ADD_ANALYZER_2, 858 analyzer.getLocale(), 859 analyzer.getClassName())); 860 } 861 } 862 863 /** 864 * Adds a document type.<p> 865 * 866 * @param documentType a document type 867 */ 868 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 869 870 m_documentTypeConfigs.add(documentType); 871 872 if (CmsLog.INIT.isInfoEnabled()) { 873 CmsLog.INIT.info( 874 Messages.get().getBundle().key( 875 Messages.INIT_SEARCH_DOC_TYPES_2, 876 documentType.getName(), 877 documentType.getClassName())); 878 } 879 } 880 881 /** 882 * Adds a search field configuration to the search manager.<p> 883 * 884 * @param fieldConfiguration the search field configuration to add 885 */ 886 public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) { 887 888 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 889 } 890 891 /** 892 * Adds a search index to the configuration.<p> 893 * 894 * @param searchIndex the search index to add 895 */ 896 public void addSearchIndex(I_CmsSearchIndex searchIndex) { 897 898 if (!searchIndex.isInitialized()) { 899 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 900 try { 901 searchIndex.initialize(); 902 } catch (CmsException e) { 903 // should never happen 904 LOG.error(e.getMessage(), e); 905 } 906 } 907 } 908 909 // name: not null or emtpy and unique 910 String name = searchIndex.getName(); 911 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 912 throw new CmsIllegalArgumentException( 913 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 914 } 915 if (m_indexSources.keySet().contains(name)) { 916 throw new CmsIllegalArgumentException( 917 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 918 } 919 920 m_indexes.add(searchIndex); 921 if (m_adminCms != null) { 922 initOfflineIndexes(); 923 } 924 925 if (CmsLog.INIT.isInfoEnabled()) { 926 CmsLog.INIT.info( 927 Messages.get().getBundle().key( 928 Messages.INIT_ADD_SEARCH_INDEX_2, 929 searchIndex.getName(), 930 searchIndex.getProject())); 931 } 932 } 933 934 /** 935 * Adds a search index source configuration.<p> 936 * 937 * @param searchIndexSource a search index source configuration 938 */ 939 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 940 941 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 942 943 if (CmsLog.INIT.isInfoEnabled()) { 944 CmsLog.INIT.info( 945 Messages.get().getBundle().key( 946 Messages.INIT_SEARCH_INDEX_SOURCE_2, 947 searchIndexSource.getName(), 948 searchIndexSource.getIndexerClassName())); 949 } 950 } 951 952 /** 953 * Implements the event listener of this class.<p> 954 * 955 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 956 */ 957 public void cmsEvent(CmsEvent event) { 958 959 switch (event.getType()) { 960 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 961 List<String> indexNames = null; 962 if ((event.getData() != null) 963 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 964 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 965 indexNames = CmsStringUtil.splitAsList( 966 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 967 ",", 968 true); 969 } 970 try { 971 if (LOG.isDebugEnabled()) { 972 LOG.debug( 973 Messages.get().getBundle().key( 974 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 975 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 976 new Exception()); 977 } 978 if (indexNames == null) { 979 rebuildAllIndexes(getEventReport(event)); 980 } else { 981 rebuildIndexes(indexNames, getEventReport(event)); 982 } 983 } catch (CmsException e) { 984 if (LOG.isErrorEnabled()) { 985 LOG.error( 986 Messages.get().getBundle().key( 987 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 988 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 989 e); 990 } 991 } 992 break; 993 case I_CmsEventListener.EVENT_CLEAR_CACHES: 994 if (LOG.isDebugEnabled()) { 995 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 996 } 997 break; 998 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 999 // event data contains a list of the published resources 1000 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1001 if (LOG.isDebugEnabled()) { 1002 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1003 } 1004 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1005 if (LOG.isDebugEnabled()) { 1006 LOG.debug( 1007 Messages.get().getBundle().key( 1008 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1009 publishHistoryId)); 1010 } 1011 break; 1012 default: 1013 // no operation 1014 } 1015 } 1016 1017 /** 1018 * Returns all Solr index.<p> 1019 * 1020 * @return all Solr indexes 1021 */ 1022 public List<CmsSolrIndex> getAllSolrIndexes() { 1023 1024 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1025 for (String indexName : getIndexNames()) { 1026 CmsSolrIndex index = getIndexSolr(indexName); 1027 if (index != null) { 1028 result.add(index); 1029 } 1030 } 1031 return result; 1032 } 1033 1034 /** 1035 * Returns an analyzer for the given language.<p> 1036 * 1037 * The analyzer is selected according to the analyzer configuration.<p> 1038 * 1039 * @param locale the locale to get the analyzer for 1040 * @return the appropriate lucene analyzer 1041 * 1042 * @throws CmsSearchException if something goes wrong 1043 */ 1044 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1045 1046 Analyzer analyzer = null; 1047 String className = null; 1048 1049 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1050 if (analyzerConf == null) { 1051 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1052 } 1053 1054 try { 1055 analyzer = getAnalyzer(analyzerConf.getClassName()); 1056 } catch (Exception e) { 1057 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1058 } 1059 1060 return analyzer; 1061 } 1062 1063 /** 1064 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1065 * 1066 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1067 * 1068 * @return an unmodifiable view of the Analyzers Map 1069 */ 1070 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1071 1072 return Collections.unmodifiableMap(m_analyzers); 1073 } 1074 1075 /** 1076 * Returns the search analyzer for the given locale.<p> 1077 * 1078 * @param locale the locale to get the analyzer for 1079 * 1080 * @return the search analyzer for the given locale 1081 */ 1082 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1083 1084 return m_analyzers.get(locale); 1085 } 1086 1087 /** 1088 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1089 * 1090 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1091 */ 1092 public String getDirectory() { 1093 1094 return m_path; 1095 } 1096 1097 /** 1098 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1099 * 1100 * @return the Solr home directory 1101 */ 1102 public String getDirectorySolr() { 1103 1104 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1105 } 1106 1107 /** 1108 * Returns a lucene document factory for given resource.<p> 1109 * 1110 * The type of the document factory is selected by the type of the resource 1111 * and the MIME type of the resource content, according to the configuration in <code>opencms-search.xml</code>.<p> 1112 * 1113 * @param resource a cms resource 1114 * @return a lucene document factory or null 1115 */ 1116 public I_CmsDocumentFactory getDocumentFactory(CmsResource resource) { 1117 1118 // first get the MIME type of the resource 1119 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1120 String resourceType = null; 1121 try { 1122 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1123 } catch (CmsLoaderException e) { 1124 // ignore, unknown resource type, resource can not be indexed 1125 LOG.info(e.getLocalizedMessage(), e); 1126 } 1127 return getDocumentFactory(resourceType, mimeType); 1128 } 1129 1130 /** 1131 * Returns a lucene document factory for given resource type and MIME type.<p> 1132 * 1133 * The type of the document factory is selected according to the configuration 1134 * in <code>opencms-search.xml</code>.<p> 1135 * 1136 * @param resourceType the resource type name 1137 * @param mimeType the MIME type 1138 * 1139 * @return a lucene document factory or null in case no matching factory was found 1140 */ 1141 public I_CmsDocumentFactory getDocumentFactory(String resourceType, String mimeType) { 1142 1143 I_CmsDocumentFactory result = null; 1144 if (resourceType != null) { 1145 // create the factory lookup key for the document 1146 String documentTypeKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1147 // check if a setting is available for this specific MIME type 1148 result = m_documentTypes.get(documentTypeKey); 1149 if (result == null) { 1150 // no setting is available, try to use a generic setting without MIME type 1151 result = m_documentTypes.get(A_CmsVfsDocument.getDocumentKey(resourceType, null)); 1152 // please note: the result may still be null 1153 } 1154 } 1155 return result; 1156 } 1157 1158 /** 1159 * Returns a document type config.<p> 1160 * 1161 * @param name the name of the document type config 1162 * @return the document type config. 1163 */ 1164 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1165 1166 // this is really used only for the search manager GUI, 1167 // so performance is not an issue and no lookup map is generated 1168 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1169 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1170 if (type.getName().equals(name)) { 1171 return type; 1172 } 1173 } 1174 return null; 1175 } 1176 1177 /** 1178 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1179 * 1180 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1181 */ 1182 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1183 1184 return Collections.unmodifiableList(m_documentTypeConfigs); 1185 } 1186 1187 /** 1188 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1189 * 1190 * @return the maximum age a text extraction result is kept in the cache (in hours) 1191 */ 1192 public float getExtractionCacheMaxAge() { 1193 1194 return m_extractionCacheMaxAge; 1195 } 1196 1197 /** 1198 * Returns the search field configuration with the given name.<p> 1199 * 1200 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1201 * 1202 * @param name the name to get the search field configuration for 1203 * 1204 * @return the search field configuration with the given name 1205 */ 1206 public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1207 1208 return m_fieldConfigurations.get(name); 1209 } 1210 1211 /** 1212 * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p> 1213 * 1214 * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries 1215 */ 1216 public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() { 1217 1218 List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>( 1219 m_fieldConfigurations.values()); 1220 Collections.sort(result); 1221 return Collections.unmodifiableList(result); 1222 } 1223 1224 /** 1225 * Returns the Lucene search field configurations only.<p> 1226 * 1227 * @return the Lucene search field configurations 1228 */ 1229 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1230 1231 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1232 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1233 if (conf instanceof CmsLuceneFieldConfiguration) { 1234 result.add((CmsLuceneFieldConfiguration)conf); 1235 } 1236 } 1237 Collections.sort(result); 1238 return Collections.unmodifiableList(result); 1239 } 1240 1241 /** 1242 * Returns the Solr search field configurations only.<p> 1243 * 1244 * @return the Solr search field configurations 1245 */ 1246 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1247 1248 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1249 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1250 if (conf instanceof CmsSolrFieldConfiguration) { 1251 result.add((CmsSolrFieldConfiguration)conf); 1252 } 1253 } 1254 Collections.sort(result); 1255 return Collections.unmodifiableList(result); 1256 } 1257 1258 /** 1259 * Returns the force unlock mode during indexing.<p> 1260 * 1261 * @return the force unlock mode during indexing 1262 */ 1263 public CmsSearchForceUnlockMode getForceunlock() { 1264 1265 return m_forceUnlockMode; 1266 } 1267 1268 /** 1269 * Returns the highlighter.<p> 1270 * 1271 * @return the highlighter 1272 */ 1273 public I_CmsTermHighlighter getHighlighter() { 1274 1275 return m_highlighter; 1276 } 1277 1278 /** 1279 * Returns the Lucene search index configured with the given name.<p> 1280 * The index must exist, otherwise <code>null</code> is returned. 1281 * 1282 * @param indexName then name of the requested search index 1283 * 1284 * @return the Lucene search index configured with the given name 1285 */ 1286 public I_CmsSearchIndex getIndex(String indexName) { 1287 1288 for (I_CmsSearchIndex index : m_indexes) { 1289 if (indexName.equalsIgnoreCase(index.getName())) { 1290 return index; 1291 } 1292 } 1293 return null; 1294 } 1295 1296 /** 1297 * Returns the seconds to wait for an index lock during an update operation.<p> 1298 * 1299 * @return the seconds to wait for an index lock during an update operation 1300 */ 1301 public int getIndexLockMaxWaitSeconds() { 1302 1303 return m_indexLockMaxWaitSeconds; 1304 } 1305 1306 /** 1307 * Returns the names of all configured indexes.<p> 1308 * 1309 * @return list of names 1310 */ 1311 public List<String> getIndexNames() { 1312 1313 List<String> indexNames = new ArrayList<String>(); 1314 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1315 indexNames.add((m_indexes.get(i)).getName()); 1316 } 1317 1318 return indexNames; 1319 } 1320 1321 /** 1322 * Returns the Solr index configured with the given name.<p> 1323 * The index must exist, otherwise <code>null</code> is returned. 1324 * 1325 * @param indexName then name of the requested Solr index 1326 * @return the Solr index configured with the given name 1327 */ 1328 public CmsSolrIndex getIndexSolr(String indexName) { 1329 1330 I_CmsSearchIndex index = getIndex(indexName); 1331 if (index instanceof CmsSolrIndex) { 1332 return (CmsSolrIndex)index; 1333 } 1334 return null; 1335 } 1336 1337 /** 1338 * Returns a search index source for a specified source name.<p> 1339 * 1340 * @param sourceName the name of the index source 1341 * @return a search index source 1342 */ 1343 public CmsSearchIndexSource getIndexSource(String sourceName) { 1344 1345 return m_indexSources.get(sourceName); 1346 } 1347 1348 /** 1349 * Returns the max. excerpt length.<p> 1350 * 1351 * @return the max excerpt length 1352 */ 1353 public int getMaxExcerptLength() { 1354 1355 return m_maxExcerptLength; 1356 } 1357 1358 /** 1359 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1360 * 1361 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1362 */ 1363 public long getMaxIndexWaitTime() { 1364 1365 return m_maxIndexWaitTime; 1366 } 1367 1368 /** 1369 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1370 * 1371 * @return the maximum number of modifications before a commit in the search index is triggered 1372 */ 1373 public int getMaxModificationsBeforeCommit() { 1374 1375 return m_maxModificationsBeforeCommit; 1376 } 1377 1378 /** 1379 * Returns the update frequency of the offline indexer in milliseconds.<p> 1380 * 1381 * @return the update frequency of the offline indexer in milliseconds 1382 */ 1383 public long getOfflineUpdateFrequency() { 1384 1385 return m_offlineUpdateFrequency; 1386 } 1387 1388 /** 1389 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1390 * 1391 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1392 */ 1393 public List<I_CmsSearchIndex> getSearchIndexes() { 1394 1395 return Collections.unmodifiableList(m_indexes); 1396 } 1397 1398 /** 1399 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1400 * 1401 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1402 */ 1403 public List<I_CmsSearchIndex> getSearchIndexesAll() { 1404 1405 return Collections.unmodifiableList(m_indexes); 1406 } 1407 1408 /** 1409 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1410 * 1411 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1412 */ 1413 public List<CmsSolrIndex> getSearchIndexesSolr() { 1414 1415 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1416 for (I_CmsSearchIndex index : m_indexes) { 1417 if (index instanceof CmsSolrIndex) { 1418 indexes.add((CmsSolrIndex)index); 1419 } 1420 } 1421 return Collections.unmodifiableList(indexes); 1422 } 1423 1424 /** 1425 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1426 * 1427 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1428 */ 1429 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1430 1431 return Collections.unmodifiableMap(m_indexSources); 1432 } 1433 1434 /** 1435 * Return singleton instance of the OpenCms spellchecker.<p> 1436 * 1437 * @return instance of CmsSolrSpellchecker. 1438 */ 1439 public CmsSolrSpellchecker getSolrDictionary() { 1440 1441 // get the core container that contains one core for each configured index 1442 if (m_coreContainer == null) { 1443 m_coreContainer = createCoreContainer(); 1444 } 1445 return CmsSolrSpellchecker.getInstance(m_coreContainer); 1446 } 1447 1448 /** 1449 * Returns the Solr configuration.<p> 1450 * 1451 * @return the Solr configuration 1452 */ 1453 public CmsSolrConfiguration getSolrServerConfiguration() { 1454 1455 return m_solrConfig; 1456 } 1457 1458 /** 1459 * Returns the timeout to abandon threads indexing a resource.<p> 1460 * 1461 * @return the timeout to abandon threads indexing a resource 1462 */ 1463 public long getTimeout() { 1464 1465 return m_timeout; 1466 } 1467 1468 /** 1469 * Initializes the search manager.<p> 1470 * 1471 * @param cms the cms object 1472 * 1473 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1474 */ 1475 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1476 1477 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1478 try { 1479 // store the Admin cms to index Cms resources 1480 m_adminCms = OpenCms.initCmsObject(cms); 1481 } catch (CmsException e) { 1482 // this should never happen 1483 LOG.error(e.getLocalizedMessage(), e); 1484 } 1485 // make sure the site root is the root site 1486 m_adminCms.getRequestContext().setSiteRoot("/"); 1487 1488 // create the extraction result cache 1489 m_extractionResultCache = new CmsExtractionResultCache( 1490 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1491 "/extractCache"); 1492 initializeFieldConfigurations(); 1493 initializeIndexes(); 1494 initOfflineIndexes(); 1495 1496 // register this object as event listener 1497 OpenCms.addCmsEventListener( 1498 this, 1499 new int[] { 1500 I_CmsEventListener.EVENT_CLEAR_CACHES, 1501 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1502 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES}); 1503 } 1504 1505 /** 1506 * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations. 1507 */ 1508 public void initializeFieldConfigurations() { 1509 1510 for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) { 1511 config.init(); 1512 } 1513 1514 } 1515 1516 /** 1517 * Initializes all configured document types and search indexes.<p> 1518 * 1519 * This methods needs to be called if after a change in the index configuration has been made. 1520 */ 1521 public void initializeIndexes() { 1522 1523 initAvailableDocumentTypes(); 1524 initSearchIndexes(); 1525 } 1526 1527 /** 1528 * Initialize the offline index handler, require after an offline index has been added.<p> 1529 */ 1530 public void initOfflineIndexes() { 1531 1532 // check which indexes are configured as offline indexes 1533 List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>(); 1534 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 1535 while (i.hasNext()) { 1536 I_CmsSearchIndex index = i.next(); 1537 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1538 // this is an offline index 1539 offlineIndexes.add(index); 1540 } 1541 } 1542 m_offlineIndexes = offlineIndexes; 1543 m_offlineHandler.initialize(); 1544 1545 } 1546 1547 /** 1548 * Initializes the spell check index.<p> 1549 * 1550 * @param adminCms the ROOT_ADMIN cms context 1551 */ 1552 public void initSpellcheckIndex(CmsObject adminCms) { 1553 1554 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) { 1555 final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary(); 1556 if (spellchecker != null) { 1557 1558 Runnable initRunner = new Runnable() { 1559 1560 public void run() { 1561 1562 try { 1563 spellchecker.parseAndAddDictionaries(adminCms); 1564 } catch (CmsRoleViolationException e) { 1565 LOG.error(e.getLocalizedMessage(), e); 1566 } 1567 } 1568 }; 1569 new Thread(initRunner).start(); 1570 } 1571 } 1572 } 1573 1574 /** 1575 * Returns if the offline indexing is paused.<p> 1576 * 1577 * @return <code>true</code> if the offline indexing is paused 1578 */ 1579 public boolean isOfflineIndexingPaused() { 1580 1581 return m_offlineUpdateFrequency == Long.MAX_VALUE; 1582 } 1583 1584 /** 1585 * Updates the indexes from as a scheduled job.<p> 1586 * 1587 * @param cms the OpenCms user context to use when reading resources from the VFS 1588 * @param parameters the parameters for the scheduled job 1589 * 1590 * @throws Exception if something goes wrong 1591 * 1592 * @return the String to write in the scheduler log 1593 * 1594 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 1595 */ 1596 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 1597 1598 CmsSearchManager manager = OpenCms.getSearchManager(); 1599 1600 I_CmsReport report = null; 1601 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 1602 1603 if (writeLog) { 1604 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 1605 } 1606 1607 List<String> updateList = null; 1608 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 1609 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 1610 // index list has been provided as job parameter 1611 updateList = new ArrayList<String>(); 1612 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 1613 for (int i = 0; i < indexNames.length; i++) { 1614 // check if the index actually exists 1615 if (manager.getIndex(indexNames[i]) != null) { 1616 updateList.add(indexNames[i]); 1617 } else { 1618 if (LOG.isWarnEnabled()) { 1619 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 1620 } 1621 } 1622 } 1623 } 1624 1625 long startTime = System.currentTimeMillis(); 1626 1627 if (updateList == null) { 1628 // all indexes need to be updated 1629 manager.rebuildAllIndexes(report); 1630 } else { 1631 // rebuild only the selected indexes 1632 manager.rebuildIndexes(updateList, report); 1633 } 1634 1635 long runTime = System.currentTimeMillis() - startTime; 1636 1637 String finishMessage = Messages.get().getBundle().key( 1638 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 1639 CmsStringUtil.formatRuntime(runTime)); 1640 1641 if (LOG.isInfoEnabled()) { 1642 LOG.info(finishMessage); 1643 } 1644 return finishMessage; 1645 } 1646 1647 /** 1648 * Pauses the offline indexing.<p> 1649 * May take some time, because the indexes are updated first.<p> 1650 */ 1651 public void pauseOfflineIndexing() { 1652 1653 if (m_offlineUpdateFrequency != Long.MAX_VALUE) { 1654 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 1655 m_offlineUpdateFrequency = Long.MAX_VALUE; 1656 updateOfflineIndexes(0); 1657 } 1658 } 1659 1660 /** 1661 * Rebuilds (if required creates) all configured indexes.<p> 1662 * 1663 * @param report the report object to write messages (or <code>null</code>) 1664 * 1665 * @throws CmsException if something goes wrong 1666 */ 1667 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 1668 1669 try { 1670 SEARCH_MANAGER_LOCK.lock(); 1671 1672 CmsMessageContainer container = null; 1673 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1674 // iterate all configured search indexes 1675 I_CmsSearchIndex searchIndex = m_indexes.get(i); 1676 try { 1677 // update the index 1678 updateIndex(searchIndex, report, null); 1679 } catch (CmsException e) { 1680 container = new CmsMessageContainer( 1681 Messages.get(), 1682 Messages.ERR_INDEX_REBUILD_ALL_1, 1683 new Object[] {searchIndex.getName()}); 1684 LOG.error( 1685 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 1686 e); 1687 } 1688 } 1689 // clean up the extraction result cache 1690 cleanExtractionCache(); 1691 if (container != null) { 1692 // throw stored exception 1693 throw new CmsSearchException(container); 1694 } 1695 } finally { 1696 SEARCH_MANAGER_LOCK.unlock(); 1697 } 1698 } 1699 1700 /** 1701 * Rebuilds (if required creates) the index with the given name.<p> 1702 * 1703 * @param indexName the name of the index to rebuild 1704 * @param report the report object to write messages (or <code>null</code>) 1705 * 1706 * @throws CmsException if something goes wrong 1707 */ 1708 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 1709 1710 try { 1711 SEARCH_MANAGER_LOCK.lock(); 1712 // get the search index by name 1713 I_CmsSearchIndex index = getIndex(indexName); 1714 // update the index 1715 updateIndex(index, report, null); 1716 // clean up the extraction result cache 1717 cleanExtractionCache(); 1718 } finally { 1719 SEARCH_MANAGER_LOCK.unlock(); 1720 } 1721 } 1722 1723 /** 1724 * Rebuilds (if required creates) the List of indexes with the given name.<p> 1725 * 1726 * @param indexNames the names (String) of the index to rebuild 1727 * @param report the report object to write messages (or <code>null</code>) 1728 * 1729 * @throws CmsException if something goes wrong 1730 */ 1731 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 1732 1733 try { 1734 SEARCH_MANAGER_LOCK.lock(); 1735 Iterator<String> i = indexNames.iterator(); 1736 while (i.hasNext()) { 1737 String indexName = i.next(); 1738 // get the search index by name 1739 I_CmsSearchIndex index = getIndex(indexName); 1740 if (index != null) { 1741 // update the index 1742 updateIndex(index, report, null); 1743 } else { 1744 if (LOG.isWarnEnabled()) { 1745 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1746 } 1747 } 1748 } 1749 // clean up the extraction result cache 1750 cleanExtractionCache(); 1751 } finally { 1752 SEARCH_MANAGER_LOCK.unlock(); 1753 } 1754 } 1755 1756 /** 1757 * Registers a new Solr core for the given index.<p> 1758 * 1759 * @param index the index to register a new Solr core for 1760 * 1761 * @throws CmsConfigurationException if no Solr server is configured 1762 */ 1763 @SuppressWarnings("resource") 1764 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 1765 1766 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 1767 // No solr server configured 1768 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 1769 } 1770 1771 if (m_solrConfig.getServerUrl() != null) { 1772 // HTTP Server configured 1773 // TODO Implement multi core support for HTTP server 1774 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 1775 index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build()); 1776 } 1777 1778 // get the core container that contains one core for each configured index 1779 if (m_coreContainer == null) { 1780 m_coreContainer = createCoreContainer(); 1781 } 1782 1783 // unload the existing core if it exists to avoid problems with forced unlock. 1784 if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) { 1785 m_coreContainer.unload(index.getCoreName(), false, false, true); 1786 } 1787 // ensure that all locks on the index are gone 1788 ensureIndexIsUnlocked(index.getPath()); 1789 1790 // load the core to the container 1791 File dataDir = new File(index.getPath()); 1792 if (!dataDir.exists()) { 1793 dataDir.mkdirs(); 1794 if (CmsLog.INIT.isInfoEnabled()) { 1795 CmsLog.INIT.info( 1796 Messages.get().getBundle().key( 1797 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 1798 index.getName(), 1799 index.getPath())); 1800 } 1801 } 1802 File instanceDir = new File(m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 1803 if (!instanceDir.exists()) { 1804 instanceDir.mkdirs(); 1805 if (CmsLog.INIT.isInfoEnabled()) { 1806 CmsLog.INIT.info( 1807 Messages.get().getBundle().key( 1808 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 1809 index.getName(), 1810 index.getPath())); 1811 } 1812 } 1813 1814 // create the core 1815 // TODO: suboptimal - forces always the same schema 1816 SolrCore core = null; 1817 try { 1818 // creation includes registration. 1819 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 1820 Map<String, String> properties = new HashMap<String, String>(3); 1821 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 1822 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 1823 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false); 1824 } catch (NullPointerException e) { 1825 if (core != null) { 1826 core.close(); 1827 } 1828 throw new CmsConfigurationException( 1829 Messages.get().container( 1830 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 1831 index.getName() + " (" + index.getCoreName() + ")", 1832 index.getPath(), 1833 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 1834 e); 1835 } 1836 1837 if (index.isNoSolrServerSet()) { 1838 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 1839 } 1840 if (CmsLog.INIT.isInfoEnabled()) { 1841 CmsLog.INIT.info( 1842 Messages.get().getBundle().key( 1843 Messages.INIT_SOLR_SERVER_CREATED_1, 1844 index.getName() + " (" + index.getCoreName() + ")")); 1845 } 1846 } 1847 1848 /** 1849 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 1850 * 1851 * @param fieldConfiguration the field configuration to remove from the configuration 1852 * 1853 * @return true if remove was successful, false if preconditions for removal are ok but the given 1854 * field configuration was unknown to the manager. 1855 * 1856 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 1857 * <code>{@link I_CmsSearchIndex}</code>. 1858 * 1859 */ 1860 public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) 1861 throws CmsIllegalStateException { 1862 1863 // never remove the standard field configuration 1864 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 1865 throw new CmsIllegalStateException( 1866 Messages.get().container( 1867 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 1868 fieldConfiguration.getName())); 1869 } 1870 // validation if removal will be granted 1871 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 1872 I_CmsSearchIndex idx; 1873 // the list for collecting indexes that use the given field configuration 1874 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 1875 I_CmsSearchFieldConfiguration refFieldConfig; 1876 while (itIndexes.hasNext()) { 1877 idx = itIndexes.next(); 1878 refFieldConfig = idx.getFieldConfiguration(); 1879 if (refFieldConfig.equals(fieldConfiguration)) { 1880 referrers.add(idx); 1881 } 1882 } 1883 if (referrers.size() > 0) { 1884 throw new CmsIllegalStateException( 1885 Messages.get().container( 1886 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 1887 fieldConfiguration.getName(), 1888 referrers.toString())); 1889 } 1890 1891 // remove operation (no exception) 1892 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 1893 1894 } 1895 1896 /** 1897 * Removes a search field from the field configuration.<p> 1898 * 1899 * @param fieldConfiguration the field configuration 1900 * @param field field to remove from the field configuration 1901 * 1902 * @return true if remove was successful, false if preconditions for removal are ok but the given 1903 * field was unknown. 1904 */ 1905 public boolean removeSearchFieldConfigurationField( 1906 I_CmsSearchFieldConfiguration fieldConfiguration, 1907 CmsSearchField field) { 1908 1909 if (LOG.isInfoEnabled()) { 1910 LOG.info( 1911 Messages.get().getBundle().key( 1912 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 1913 field.getName(), 1914 fieldConfiguration.getName())); 1915 } 1916 1917 return fieldConfiguration.getFields().remove(field); 1918 } 1919 1920 /** 1921 * Removes a search field mapping from the given field.<p> 1922 * 1923 * @param field the field 1924 * @param mapping mapping to remove from the field 1925 * 1926 * @return true if remove was successful, false if preconditions for removal are ok but the given 1927 * mapping was unknown. 1928 * 1929 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 1930 */ 1931 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 1932 throws CmsIllegalStateException { 1933 1934 if (field.getMappings().size() < 2) { 1935 throw new CmsIllegalStateException( 1936 Messages.get().container( 1937 Messages.ERR_FIELD_MAPPING_DELETE_2, 1938 mapping.getType().toString(), 1939 field.getName())); 1940 } else { 1941 1942 if (LOG.isInfoEnabled()) { 1943 LOG.info( 1944 Messages.get().getBundle().key( 1945 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 1946 mapping.toString(), 1947 field.getName())); 1948 } 1949 return field.getMappings().remove(mapping); 1950 } 1951 } 1952 1953 /** 1954 * Removes a search index from the configuration.<p> 1955 * 1956 * @param searchIndex the search index to remove 1957 */ 1958 public void removeSearchIndex(I_CmsSearchIndex searchIndex) { 1959 1960 // shut down index to remove potential config files of Solr indexes 1961 searchIndex.shutDown(); 1962 if (searchIndex instanceof CmsSolrIndex) { 1963 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 1964 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 1965 } 1966 m_indexes.remove(searchIndex); 1967 initOfflineIndexes(); 1968 1969 if (LOG.isInfoEnabled()) { 1970 LOG.info( 1971 Messages.get().getBundle().key( 1972 Messages.LOG_REMOVE_SEARCH_INDEX_2, 1973 searchIndex.getName(), 1974 searchIndex.getProject())); 1975 } 1976 } 1977 1978 /** 1979 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 1980 * 1981 * @param indexNames the names of the index to remove 1982 */ 1983 public void removeSearchIndexes(List<String> indexNames) { 1984 1985 Iterator<String> i = indexNames.iterator(); 1986 while (i.hasNext()) { 1987 String indexName = i.next(); 1988 // get the search index by name 1989 I_CmsSearchIndex index = getIndex(indexName); 1990 if (index != null) { 1991 // remove the index 1992 removeSearchIndex(index); 1993 } else { 1994 if (LOG.isWarnEnabled()) { 1995 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1996 } 1997 } 1998 } 1999 } 2000 2001 /** 2002 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 2003 * 2004 * @param indexsource the indexsource to remove from the configuration 2005 * 2006 * @return true if remove was successful, false if preconditions for removal are ok but the given 2007 * searchindex was unknown to the manager. 2008 * 2009 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 2010 * <code>{@link I_CmsSearchIndex}</code>. 2011 * 2012 */ 2013 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2014 2015 // validation if removal will be granted 2016 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2017 I_CmsSearchIndex idx; 2018 // the list for collecting indexes that use the given index source 2019 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2020 // the current list of referred index sources of the iterated index 2021 List<CmsSearchIndexSource> refsources; 2022 while (itIndexes.hasNext()) { 2023 idx = itIndexes.next(); 2024 refsources = idx.getSources(); 2025 if (refsources != null) { 2026 if (refsources.contains(indexsource)) { 2027 referrers.add(idx); 2028 } 2029 } 2030 } 2031 if (referrers.size() > 0) { 2032 throw new CmsIllegalStateException( 2033 Messages.get().container( 2034 Messages.ERR_INDEX_SOURCE_DELETE_2, 2035 indexsource.getName(), 2036 referrers.toString())); 2037 } 2038 2039 // remove operation (no exception) 2040 return m_indexSources.remove(indexsource.getName()) != null; 2041 2042 } 2043 2044 /** 2045 * Resumes offline indexing if it was paused.<p> 2046 */ 2047 public void resumeOfflineIndexing() { 2048 2049 if (m_offlineUpdateFrequency == Long.MAX_VALUE) { 2050 setOfflineUpdateFrequency( 2051 m_configuredOfflineIndexingFrequency > 0 2052 ? m_configuredOfflineIndexingFrequency 2053 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2054 } 2055 } 2056 2057 /** 2058 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2059 * 2060 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2061 */ 2062 public void setDirectory(String value) { 2063 2064 m_path = value; 2065 } 2066 2067 /** 2068 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2069 * 2070 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2071 */ 2072 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2073 2074 m_extractionCacheMaxAge = extractionCacheMaxAge; 2075 } 2076 2077 /** 2078 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2079 * 2080 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2081 */ 2082 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2083 2084 try { 2085 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2086 } catch (NumberFormatException e) { 2087 LOG.error( 2088 Messages.get().getBundle().key( 2089 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2090 extractionCacheMaxAge, 2091 new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2092 e); 2093 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2094 } 2095 } 2096 2097 /** 2098 * Sets the unlock mode during indexing.<p> 2099 * 2100 * @param value the value 2101 */ 2102 public void setForceunlock(String value) { 2103 2104 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2105 } 2106 2107 /** 2108 * Sets the highlighter.<p> 2109 * 2110 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2111 * 2112 * @param highlighter the package/class name of the highlighter 2113 */ 2114 public void setHighlighter(String highlighter) { 2115 2116 try { 2117 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2118 } catch (Exception e) { 2119 m_highlighter = null; 2120 LOG.error(e.getLocalizedMessage(), e); 2121 } 2122 } 2123 2124 /** 2125 * Sets the seconds to wait for an index lock during an update operation.<p> 2126 * 2127 * @param value the seconds to wait for an index lock during an update operation 2128 */ 2129 public void setIndexLockMaxWaitSeconds(int value) { 2130 2131 m_indexLockMaxWaitSeconds = value; 2132 } 2133 2134 /** 2135 * Sets the max. excerpt length.<p> 2136 * 2137 * @param maxExcerptLength the max. excerpt length to set 2138 */ 2139 public void setMaxExcerptLength(int maxExcerptLength) { 2140 2141 m_maxExcerptLength = maxExcerptLength; 2142 } 2143 2144 /** 2145 * Sets the max. excerpt length as a String.<p> 2146 * 2147 * @param maxExcerptLength the max. excerpt length to set 2148 */ 2149 public void setMaxExcerptLength(String maxExcerptLength) { 2150 2151 try { 2152 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2153 } catch (Exception e) { 2154 LOG.error( 2155 Messages.get().getBundle().key( 2156 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2157 maxExcerptLength, 2158 new Integer(DEFAULT_EXCERPT_LENGTH)), 2159 e); 2160 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2161 } 2162 } 2163 2164 /** 2165 * Sets the maximal wait time for offline index updates after edit operations.<p> 2166 * 2167 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2168 */ 2169 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2170 2171 m_maxIndexWaitTime = maxIndexWaitTime; 2172 } 2173 2174 /** 2175 * Sets the maximal wait time for offline index updates after edit operations.<p> 2176 * 2177 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2178 */ 2179 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2180 2181 try { 2182 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2183 } catch (Exception e) { 2184 LOG.error( 2185 Messages.get().getBundle().key( 2186 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2187 maxIndexWaitTime, 2188 new Long(DEFAULT_MAX_INDEX_WAITTIME)), 2189 e); 2190 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2191 } 2192 } 2193 2194 /** 2195 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2196 * 2197 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2198 */ 2199 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2200 2201 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2202 } 2203 2204 /** 2205 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2206 * 2207 * @param value the maximum number of modifications to set 2208 */ 2209 public void setMaxModificationsBeforeCommit(String value) { 2210 2211 try { 2212 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2213 } catch (Exception e) { 2214 LOG.error( 2215 Messages.get().getBundle().key( 2216 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2217 value, 2218 new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2219 e); 2220 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2221 } 2222 } 2223 2224 /** 2225 * Sets the update frequency of the offline indexer in milliseconds.<p> 2226 * 2227 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2228 */ 2229 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2230 2231 m_offlineUpdateFrequency = offlineUpdateFrequency; 2232 updateOfflineIndexes(0); 2233 } 2234 2235 /** 2236 * Sets the update frequency of the offline indexer in milliseconds.<p> 2237 * 2238 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2239 */ 2240 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2241 2242 try { 2243 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2244 } catch (Exception e) { 2245 LOG.error( 2246 Messages.get().getBundle().key( 2247 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2248 offlineUpdateFrequency, 2249 new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2250 e); 2251 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2252 } 2253 } 2254 2255 /** 2256 * Sets the Solr configuration.<p> 2257 * 2258 * @param config the Solr configuration 2259 */ 2260 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2261 2262 m_solrConfig = config; 2263 } 2264 2265 /** 2266 * Sets the timeout to abandon threads indexing a resource.<p> 2267 * 2268 * @param value the timeout in milliseconds 2269 */ 2270 public void setTimeout(long value) { 2271 2272 m_timeout = value; 2273 } 2274 2275 /** 2276 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2277 * 2278 * @param value the timeout in milliseconds 2279 */ 2280 public void setTimeout(String value) { 2281 2282 try { 2283 setTimeout(Long.parseLong(value)); 2284 } catch (Exception e) { 2285 LOG.error( 2286 Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)), 2287 e); 2288 setTimeout(DEFAULT_TIMEOUT); 2289 } 2290 } 2291 2292 /** 2293 * Shuts down the search manager.<p> 2294 * 2295 * This will cause all search indices to be shut down.<p> 2296 */ 2297 public void shutDown() { 2298 2299 if (m_offlineIndexThread != null) { 2300 m_offlineIndexThread.shutDown(); 2301 } 2302 2303 if (m_offlineHandler != null) { 2304 OpenCms.removeCmsEventListener(m_offlineHandler); 2305 } 2306 2307 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2308 while (i.hasNext()) { 2309 I_CmsSearchIndex index = i.next(); 2310 index.shutDown(); 2311 index = null; 2312 } 2313 m_indexes.clear(); 2314 2315 shutDownSolrContainer(); 2316 2317 if (CmsLog.INIT.isInfoEnabled()) { 2318 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2319 } 2320 } 2321 2322 /** 2323 * Updates all offline indexes.<p> 2324 * 2325 * Can be used to force an index update when it's not convenient to wait until the 2326 * offline update interval has eclipsed.<p> 2327 * 2328 * Since the offline indexes still need some time to update the new resources, 2329 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2330 * to ensure that updating is finished. 2331 * 2332 * @see #updateOfflineIndexes(long) 2333 * 2334 */ 2335 public void updateOfflineIndexes() { 2336 2337 updateOfflineIndexes(getMaxIndexWaitTime()); 2338 } 2339 2340 /** 2341 * Updates all offline indexes.<p> 2342 * 2343 * Can be used to force an index update when it's not convenient to wait until the 2344 * offline update interval has eclipsed.<p> 2345 * 2346 * Since the offline index will still need some time to update the new resources even if it runs directly, 2347 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2348 * 2349 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2350 */ 2351 public void updateOfflineIndexes(long waitTime) { 2352 2353 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2354 // notify existing thread of update frequency change 2355 if (LOG.isDebugEnabled()) { 2356 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2357 } 2358 m_offlineIndexThread.interrupt(); 2359 if (waitTime > 0) { 2360 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2361 } 2362 } 2363 } 2364 2365 /** 2366 * Cleans up the extraction result cache.<p> 2367 */ 2368 protected void cleanExtractionCache() { 2369 2370 // clean up the extraction result cache 2371 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2372 } 2373 2374 /** 2375 * Collects the related containerpages to the resources that have been published.<p> 2376 * 2377 * @param adminCms an OpenCms user context with Admin permissions 2378 * @param updateResources the resources to be re-indexed 2379 * 2380 * @return the updated list of resource to re-index 2381 */ 2382 protected List<CmsPublishedResource> findRelatedContainerPages( 2383 CmsObject adminCms, 2384 List<CmsPublishedResource> updateResources) { 2385 2386 Set<CmsResource> elementGroups = new HashSet<CmsResource>(); 2387 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2388 int containerPageTypeId = -1; 2389 try { 2390 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2391 } catch (CmsLoaderException e) { 2392 // will happen during setup, when container page type is not available yet 2393 LOG.info(e.getLocalizedMessage(), e); 2394 } 2395 if (containerPageTypeId != -1) { 2396 for (CmsPublishedResource pubRes : updateResources) { 2397 try { 2398 if (OpenCms.getResourceManager().getResourceType( 2399 pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2400 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2401 filter.filterStrong(); 2402 List<CmsRelation> relations = adminCms.readRelations(filter); 2403 for (CmsRelation relation : relations) { 2404 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2405 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2406 containerPages.add(res); 2407 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2408 adminCms, 2409 adminCms.getSitePath(res))) { 2410 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2411 } 2412 } else if (OpenCms.getResourceManager().getResourceType( 2413 res.getTypeId()).getTypeName().equals( 2414 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) { 2415 elementGroups.add(res); 2416 } 2417 } 2418 } 2419 if (containerPageTypeId == pubRes.getType()) { 2420 addDetailContent( 2421 adminCms, 2422 containerPages, 2423 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2424 } 2425 } catch (CmsException e) { 2426 LOG.error(e.getLocalizedMessage(), e); 2427 } 2428 } 2429 for (CmsResource pubRes : elementGroups) { 2430 try { 2431 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); 2432 filter.filterStrong(); 2433 List<CmsRelation> relations = adminCms.readRelations(filter); 2434 for (CmsRelation relation : relations) { 2435 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2436 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2437 containerPages.add(res); 2438 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2439 adminCms, 2440 adminCms.getSitePath(res))) { 2441 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2442 } 2443 } 2444 } 2445 } catch (CmsException e) { 2446 LOG.error(e.getLocalizedMessage(), e); 2447 } 2448 } 2449 // add all found container pages as published resource objects to the list 2450 for (CmsResource page : containerPages) { 2451 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2452 if (!updateResources.contains(pubCont)) { 2453 // ensure container page is added only once 2454 updateResources.add(pubCont); 2455 } 2456 } 2457 } 2458 return updateResources; 2459 } 2460 2461 /** 2462 * Returns the set of names of all configured document types.<p> 2463 * 2464 * @return the set of names of all configured document types 2465 */ 2466 protected List<String> getDocumentTypes() { 2467 2468 List<String> names = new ArrayList<String>(); 2469 for (Iterator<I_CmsDocumentFactory> i = m_documentTypes.values().iterator(); i.hasNext();) { 2470 I_CmsDocumentFactory factory = i.next(); 2471 names.add(factory.getName()); 2472 } 2473 return names; 2474 } 2475 2476 /** 2477 * Returns the a offline project used for offline indexing.<p> 2478 * 2479 * @return the offline project if available 2480 */ 2481 protected CmsProject getOfflineIndexProject() { 2482 2483 CmsProject result = null; 2484 for (I_CmsSearchIndex index : m_offlineIndexes) { 2485 try { 2486 result = m_adminCms.readProject(index.getProject()); 2487 2488 if (!result.isOnlineProject()) { 2489 break; 2490 } 2491 } catch (Exception e) { 2492 // may be a missconfigured index, ignore 2493 LOG.error(e.getLocalizedMessage(), e); 2494 } 2495 } 2496 return result; 2497 } 2498 2499 /** 2500 * Returns a new thread manager for the indexing threads.<p> 2501 * 2502 * @return a new thread manager for the indexing threads 2503 */ 2504 protected CmsIndexingThreadManager getThreadManager() { 2505 2506 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 2507 } 2508 2509 /** 2510 * Initializes the available Cms resource types to be indexed.<p> 2511 * 2512 * A map stores document factories keyed by a string representing 2513 * a colon separated list of Cms resource types and/or mimetypes.<p> 2514 * 2515 * The keys of this map are used to trigger a document factory to convert 2516 * a Cms resource into a Lucene index document.<p> 2517 * 2518 * A document factory is a class implementing the interface 2519 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 2520 */ 2521 protected void initAvailableDocumentTypes() { 2522 2523 CmsSearchDocumentType documenttype = null; 2524 String className = null; 2525 String name = null; 2526 I_CmsDocumentFactory documentFactory = null; 2527 List<String> resourceTypes = null; 2528 List<String> mimeTypes = null; 2529 Class<?> c = null; 2530 2531 m_documentTypes = new HashMap<String, I_CmsDocumentFactory>(); 2532 2533 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 2534 2535 documenttype = m_documentTypeConfigs.get(i); 2536 name = documenttype.getName(); 2537 2538 try { 2539 className = documenttype.getClassName(); 2540 resourceTypes = documenttype.getResourceTypes(); 2541 mimeTypes = documenttype.getMimeTypes(); 2542 2543 if (name == null) { 2544 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 2545 } 2546 if (className == null) { 2547 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 2548 } 2549 if (resourceTypes.size() == 0) { 2550 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 2551 } 2552 2553 try { 2554 c = Class.forName(className); 2555 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 2556 new Object[] {name}); 2557 } catch (ClassNotFoundException exc) { 2558 throw new CmsIndexException( 2559 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 2560 exc); 2561 } catch (Exception exc) { 2562 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 2563 } 2564 2565 if (documentFactory.isUsingCache()) { 2566 // init cache if used by the factory 2567 documentFactory.setCache(m_extractionResultCache); 2568 } 2569 2570 for (Iterator<String> key = documentFactory.getDocumentKeys( 2571 resourceTypes, 2572 mimeTypes).iterator(); key.hasNext();) { 2573 m_documentTypes.put(key.next(), documentFactory); 2574 } 2575 2576 } catch (CmsException e) { 2577 if (LOG.isWarnEnabled()) { 2578 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 2579 } 2580 } 2581 } 2582 } 2583 2584 /** 2585 * Initializes the configured search indexes.<p> 2586 * 2587 * This initializes also the list of Cms resources types 2588 * to be indexed by an index source.<p> 2589 */ 2590 protected void initSearchIndexes() { 2591 2592 I_CmsSearchIndex index = null; 2593 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2594 index = m_indexes.get(i); 2595 // reset disabled flag 2596 index.setEnabled(true); 2597 // check if the index has been configured correctly 2598 if (index.checkConfiguration(m_adminCms)) { 2599 // the index is configured correctly 2600 try { 2601 index.initialize(); 2602 } catch (Exception e) { 2603 if (CmsLog.INIT.isWarnEnabled()) { 2604 // in this case the index will be disabled 2605 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 2606 } 2607 } 2608 } 2609 // output a log message if the index was successfully configured or not 2610 if (CmsLog.INIT.isInfoEnabled()) { 2611 if (index.isEnabled()) { 2612 CmsLog.INIT.info( 2613 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 2614 } else { 2615 CmsLog.INIT.warn( 2616 Messages.get().getBundle().key( 2617 Messages.INIT_INDEX_NOT_CONFIGURED_2, 2618 index, 2619 index.getProject())); 2620 } 2621 } 2622 } 2623 } 2624 2625 /** 2626 * Checks, if the index should be rebuilt/updated at all by the search manager. 2627 * @param index the index to check. 2628 * @return a flag, indicating if the index should be rebuilt/updated at all. 2629 */ 2630 protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) { 2631 2632 if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) { 2633 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName())); 2634 return false; 2635 } else { 2636 return true; 2637 } 2638 2639 } 2640 2641 /** 2642 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 2643 * after resources have been published.<p> 2644 * 2645 * @param adminCms an OpenCms user context with Admin permissions 2646 * @param publishHistoryId the history ID of the published project 2647 * @param report the report to write the output to 2648 */ 2649 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 2650 2651 int oldPriority = Thread.currentThread().getPriority(); 2652 try { 2653 SEARCH_MANAGER_LOCK.lock(); 2654 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 2655 List<CmsPublishedResource> publishedResources; 2656 try { 2657 // read the list of all published resources 2658 publishedResources = adminCms.readPublishedResources(publishHistoryId); 2659 } catch (CmsException e) { 2660 LOG.error( 2661 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 2662 e); 2663 return; 2664 } 2665 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 2666 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 2667 2668 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 2669 for (CmsPublishedResource res : publishedResources) { 2670 if (res.isFolder() || res.getState().isUnchanged()) { 2671 // folders and unchanged resources don't need to be indexed after publish 2672 continue; 2673 } 2674 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 2675 if (updateResources.contains(res)) { 2676 // resource may have been added as a sibling of another resource 2677 // in this case we make sure to use the value from the publish list because of the "deleted" flag 2678 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 2679 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 2680 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 2681 // check it this is a moved resource with source / target info, in this case we need both entries 2682 if (!hasMoved) { 2683 // if the resource was moved, we must contain both entries 2684 updateResources.remove(res); 2685 } 2686 // "equals()" implementation of published resource checks for id, 2687 // so the removed value may have a different "deleted" or "modified" status value 2688 updateResources.add(res); 2689 } else { 2690 // resource not yet contained in the list 2691 updateResources.add(res); 2692 // check for the siblings (not for deleted resources, these are already gone) 2693 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 2694 // this resource has siblings 2695 try { 2696 // read siblings from the online project 2697 List<CmsResource> siblings = adminCms.readSiblings( 2698 res.getRootPath(), 2699 CmsResourceFilter.ALL); 2700 Iterator<CmsResource> itSib = siblings.iterator(); 2701 while (itSib.hasNext()) { 2702 // check all siblings 2703 CmsResource sibling = itSib.next(); 2704 CmsPublishedResource sib = new CmsPublishedResource(sibling); 2705 if (!updateResources.contains(sib)) { 2706 // ensure sibling is added only once 2707 updateResources.add(sib); 2708 } 2709 } 2710 } catch (CmsException e) { 2711 // ignore, just use the original resource 2712 if (LOG.isWarnEnabled()) { 2713 LOG.warn( 2714 Messages.get().getBundle().key( 2715 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 2716 res.getRootPath()), 2717 e); 2718 } 2719 } 2720 } 2721 } 2722 } 2723 } 2724 2725 findRelatedContainerPages(adminCms, updateResources); 2726 if (!updateResources.isEmpty()) { 2727 // sort the resource to update 2728 Collections.sort(updateResources); 2729 // only update the indexes if the list of remaining published resources is not empty 2730 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2731 while (i.hasNext()) { 2732 I_CmsSearchIndex index = i.next(); 2733 if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 2734 // only update indexes which have the rebuild mode set to "auto" 2735 try { 2736 updateIndex(index, report, updateResources); 2737 } catch (CmsException e) { 2738 LOG.error( 2739 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 2740 e); 2741 } 2742 } 2743 } 2744 } 2745 // clean up the extraction result cache 2746 cleanExtractionCache(); 2747 } finally { 2748 SEARCH_MANAGER_LOCK.unlock(); 2749 Thread.currentThread().setPriority(oldPriority); 2750 } 2751 } 2752 2753 /** 2754 * Updates (if required creates) the index with the given name.<p> 2755 * 2756 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 2757 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 2758 * the index will be fully rebuild.<p> 2759 * 2760 * @param index the index to update or rebuild 2761 * @param report the report to write output messages to 2762 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 2763 * 2764 * @throws CmsException if something goes wrong 2765 */ 2766 protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 2767 throws CmsException { 2768 2769 if (shouldUpdateAtAll(index)) { 2770 try { 2771 SEARCH_MANAGER_LOCK.lock(); 2772 2773 // copy the stored admin context for the indexing 2774 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 2775 // make sure a report is available 2776 if (report == null) { 2777 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 2778 } 2779 2780 // check if the index has been configured correctly 2781 if (!index.checkConfiguration(cms)) { 2782 // the index is disabled 2783 return; 2784 } 2785 2786 // set site root and project for this index 2787 cms.getRequestContext().setSiteRoot("/"); 2788 // switch to the index project 2789 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 2790 2791 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 2792 // rebuild the complete index 2793 2794 updateIndexCompletely(cms, index, report); 2795 } else { 2796 updateIndexIncremental(cms, index, report, resourcesToIndex); 2797 } 2798 } finally { 2799 SEARCH_MANAGER_LOCK.unlock(); 2800 } 2801 } 2802 } 2803 2804 /** 2805 * The method updates all OpenCms documents that are indexed. 2806 * @param cms the OpenCms user context to use for accessing the VFS 2807 * @param index the index to update 2808 * @param report the report to write output messages to 2809 * @throws CmsIndexException thrown if indexing fails for some reason 2810 */ 2811 @SuppressWarnings("null") 2812 protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report) 2813 throws CmsIndexException { 2814 2815 // create a new thread manager for the indexing threads 2816 CmsIndexingThreadManager threadManager = getThreadManager(); 2817 2818 boolean isOfflineIndex = false; 2819 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 2820 // disable offline indexing while the complete index is rebuild 2821 isOfflineIndex = true; 2822 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL); 2823 // re-initialize the offline indexes, this will disable this offline index 2824 initOfflineIndexes(); 2825 } 2826 2827 I_CmsIndexWriter writer = null; 2828 try { 2829 // create a backup of the existing index 2830 CmsSearchIndex indexInternal = null; 2831 String backup = null; 2832 if (index instanceof CmsSearchIndex) { 2833 indexInternal = (CmsSearchIndex)index; 2834 backup = indexInternal.createIndexBackup(); 2835 if (backup != null) { 2836 indexInternal.indexSearcherOpen(backup); 2837 } 2838 } 2839 2840 // create a new index writer 2841 writer = index.getIndexWriter(report, true); 2842 if (writer instanceof I_CmsSolrIndexWriter) { 2843 try { 2844 ((I_CmsSolrIndexWriter)writer).deleteAllDocuments(); 2845 } catch (IOException e) { 2846 LOG.error(e.getMessage(), e); 2847 } 2848 } 2849 2850 // output start information on the report 2851 report.println( 2852 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 2853 I_CmsReport.FORMAT_HEADLINE); 2854 2855 // iterate all configured index sources of this index 2856 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 2857 while (sources.hasNext()) { 2858 // get the next index source 2859 CmsSearchIndexSource source = sources.next(); 2860 // create the indexer 2861 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 2862 // new index creation, use all resources from the index source 2863 indexer.rebuildIndex(writer, threadManager, source); 2864 2865 // wait for indexing threads to finish 2866 while (threadManager.isRunning()) { 2867 try { 2868 Thread.sleep(500); 2869 } catch (InterruptedException e) { 2870 // just continue with the loop after interruption 2871 LOG.info(e.getLocalizedMessage(), e); 2872 } 2873 } 2874 2875 // commit and optimize the index after each index source has been finished 2876 try { 2877 writer.commit(); 2878 } catch (IOException e) { 2879 if (LOG.isWarnEnabled()) { 2880 LOG.warn( 2881 Messages.get().getBundle().key( 2882 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 2883 index.getName(), 2884 index.getPath()), 2885 e); 2886 } 2887 } 2888 try { 2889 writer.optimize(); 2890 } catch (IOException e) { 2891 if (LOG.isWarnEnabled()) { 2892 LOG.warn( 2893 Messages.get().getBundle().key( 2894 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 2895 index.getName(), 2896 index.getPath()), 2897 e); 2898 } 2899 } 2900 } 2901 2902 // we are sure here that indexInternal is not null 2903 if (backup != null) { 2904 // remove the backup after the files have been re-indexed 2905 indexInternal.indexSearcherClose(); 2906 indexInternal.removeIndexBackup(backup); 2907 } 2908 2909 // output finish information on the report 2910 report.println( 2911 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 2912 I_CmsReport.FORMAT_HEADLINE); 2913 2914 } finally { 2915 if (writer != null) { 2916 try { 2917 writer.close(); 2918 } catch (IOException e) { 2919 if (LOG.isWarnEnabled()) { 2920 LOG.warn( 2921 Messages.get().getBundle().key( 2922 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 2923 index.getPath(), 2924 index.getName()), 2925 e); 2926 } 2927 } 2928 } 2929 if (isOfflineIndex) { 2930 // reset the mode of the offline index 2931 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE); 2932 // re-initialize the offline indexes, this will re-enable this index 2933 initOfflineIndexes(); 2934 } 2935 // index has changed - initialize the index searcher instance 2936 index.onIndexChanged(true); 2937 } 2938 2939 // show information about indexing runtime 2940 threadManager.reportStatistics(report); 2941 } 2942 2943 /** 2944 * Incrementally updates the given index.<p> 2945 * 2946 * @param cms the OpenCms user context to use for accessing the VFS 2947 * @param index the index to update 2948 * @param report the report to write output messages to 2949 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 2950 * 2951 * @throws CmsException if something goes wrong 2952 */ 2953 protected void updateIndexIncremental( 2954 CmsObject cms, 2955 I_CmsSearchIndex index, 2956 I_CmsReport report, 2957 List<CmsPublishedResource> resourcesToIndex) 2958 throws CmsException { 2959 2960 try { 2961 SEARCH_MANAGER_LOCK.lock(); 2962 2963 // update the existing index 2964 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 2965 2966 boolean hasResourcesToDelete = false; 2967 boolean hasResourcesToUpdate = false; 2968 2969 // iterate all configured index sources of this index 2970 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 2971 while (sources.hasNext()) { 2972 // get the next index source 2973 CmsSearchIndexSource source = sources.next(); 2974 // create the indexer 2975 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 2976 // collect the resources to update 2977 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 2978 if (!updateData.isEmpty()) { 2979 // add the update collection to the internal pipeline 2980 updateCollections.add(updateData); 2981 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 2982 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 2983 } 2984 } 2985 2986 // only start index modification if required 2987 if (hasResourcesToDelete || hasResourcesToUpdate) { 2988 // output start information on the report 2989 report.println( 2990 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 2991 I_CmsReport.FORMAT_HEADLINE); 2992 2993 I_CmsIndexWriter writer = null; 2994 try { 2995 // obtain an index writer that updates the current index 2996 writer = index.getIndexWriter(report, false); 2997 2998 if (hasResourcesToDelete) { 2999 // delete the resource from the index 3000 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3001 while (i.hasNext()) { 3002 CmsSearchIndexUpdateData updateCollection = i.next(); 3003 if (updateCollection.hasResourcesToDelete()) { 3004 updateCollection.getIndexer().deleteResources( 3005 writer, 3006 updateCollection.getResourcesToDelete()); 3007 } 3008 } 3009 } 3010 3011 if (hasResourcesToUpdate) { 3012 // create a new thread manager 3013 CmsIndexingThreadManager threadManager = getThreadManager(); 3014 3015 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3016 while (i.hasNext()) { 3017 CmsSearchIndexUpdateData updateCollection = i.next(); 3018 if (updateCollection.hasResourceToUpdate()) { 3019 updateCollection.getIndexer().updateResources( 3020 writer, 3021 threadManager, 3022 updateCollection.getResourcesToUpdate()); 3023 } 3024 } 3025 3026 // wait for indexing threads to finish 3027 while (threadManager.isRunning()) { 3028 try { 3029 Thread.sleep(500); 3030 } catch (InterruptedException e) { 3031 // just continue with the loop after interruption 3032 LOG.info(e.getLocalizedMessage(), e); 3033 } 3034 } 3035 } 3036 } finally { 3037 // close the index writer 3038 if (writer != null) { 3039 try { 3040 writer.commit(); 3041 } catch (IOException e) { 3042 LOG.error( 3043 Messages.get().getBundle().key( 3044 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3045 index.getName(), 3046 index.getPath()), 3047 e); 3048 } 3049 } 3050 // index has changed - initialize the index searcher instance 3051 index.onIndexChanged(false); 3052 } 3053 3054 // output finish information on the report 3055 report.println( 3056 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3057 I_CmsReport.FORMAT_HEADLINE); 3058 } 3059 } finally { 3060 SEARCH_MANAGER_LOCK.unlock(); 3061 } 3062 } 3063 3064 /** 3065 * Updates the offline search indexes for the given list of resources.<p> 3066 * 3067 * @param report the report to write the index information to 3068 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3069 */ 3070 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3071 3072 CmsObject cms = m_adminCms; 3073 try { 3074 // copy the administration context for the indexing 3075 cms = OpenCms.initCmsObject(m_adminCms); 3076 // set site root and project for this index 3077 cms.getRequestContext().setSiteRoot("/"); 3078 } catch (CmsException e) { 3079 LOG.error(e.getLocalizedMessage(), e); 3080 } 3081 3082 Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator(); 3083 while (j.hasNext()) { 3084 I_CmsSearchIndex index = j.next(); 3085 if (index.getSources() != null) { 3086 try { 3087 // switch to the index project 3088 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3089 updateIndexIncremental(cms, index, report, resourcesToIndex); 3090 } catch (CmsException e) { 3091 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3092 } 3093 } 3094 } 3095 } 3096 3097 /** 3098 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3099 * 3100 * @param adminCms the cms context 3101 * @param containerPages the containerpages 3102 * @param containerPage the container page site path 3103 */ 3104 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3105 3106 if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) { 3107 3108 try { 3109 CmsResource detailRes = adminCms.readResource( 3110 CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage), 3111 CmsResourceFilter.IGNORE_EXPIRATION); 3112 containerPages.add(detailRes); 3113 } catch (Throwable e) { 3114 if (LOG.isWarnEnabled()) { 3115 LOG.warn(e.getLocalizedMessage(), e); 3116 } 3117 } 3118 } 3119 } 3120 3121 /** 3122 * Creates the Solr core container.<p> 3123 * 3124 * @return the created core container 3125 */ 3126 private CoreContainer createCoreContainer() { 3127 3128 CoreContainer container = null; 3129 try { 3130 // get the core container 3131 // still no core container: create it 3132 container = CoreContainer.createAndLoad( 3133 Paths.get(m_solrConfig.getHome()), 3134 m_solrConfig.getSolrFile().toPath()); 3135 if (CmsLog.INIT.isInfoEnabled()) { 3136 CmsLog.INIT.info( 3137 Messages.get().getBundle().key( 3138 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3139 m_solrConfig.getHome(), 3140 m_solrConfig.getSolrFile().getName())); 3141 } 3142 } catch (Exception e) { 3143 LOG.error( 3144 Messages.get().getBundle().key( 3145 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3146 m_solrConfig.getSolrFile().getAbsolutePath()), 3147 e); 3148 } 3149 return container; 3150 3151 } 3152 3153 /** 3154 * Remove write.lock file in the data directory to ensure the index is unlocked. 3155 * @param dataDir the data directory of the Solr index that should be unlocked. 3156 */ 3157 private void ensureIndexIsUnlocked(String dataDir) { 3158 3159 Collection<File> lockFiles = new ArrayList<File>(2); 3160 lockFiles.add( 3161 new File( 3162 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock")); 3163 lockFiles.add( 3164 new File( 3165 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck") 3166 + "write.lock")); 3167 for (File lockFile : lockFiles) { 3168 if (lockFile.exists()) { 3169 lockFile.delete(); 3170 LOG.warn( 3171 "Forcely unlocking index with data dir \"" 3172 + dataDir 3173 + "\" by removing file \"" 3174 + lockFile.getAbsolutePath() 3175 + "\"."); 3176 } 3177 } 3178 } 3179 3180 /** 3181 * Returns the report in the given event data, if <code>null</code> 3182 * a new log report is used.<p> 3183 * 3184 * @param event the event to get the report for 3185 * 3186 * @return the report 3187 */ 3188 private I_CmsReport getEventReport(CmsEvent event) { 3189 3190 I_CmsReport report = null; 3191 if (event.getData() != null) { 3192 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3193 } 3194 if (report == null) { 3195 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3196 } 3197 return report; 3198 } 3199 3200 /** 3201 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3202 * 3203 * @param publishedResources a list of published resources 3204 * 3205 * @return the set of structure ids that satisfy the condition above 3206 */ 3207 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3208 List<CmsPublishedResource> publishedResources) { 3209 3210 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3211 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3212 for (CmsPublishedResource pubRes : publishedResources) { 3213 if (pubRes.getState().isNew()) { 3214 result.add(pubRes.getStructureId()); 3215 } 3216 if (pubRes.getState().isDeleted()) { 3217 deletedSet.add(pubRes.getStructureId()); 3218 } 3219 } 3220 result.retainAll(deletedSet); 3221 return result; 3222 } 3223 3224 /** 3225 * Shuts down the Solr core container.<p> 3226 */ 3227 private void shutDownSolrContainer() { 3228 3229 if (m_coreContainer != null) { 3230 for (SolrCore core : m_coreContainer.getCores()) { 3231 // do not unload spellcheck core because otherwise the core.properties file is removed 3232 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3233 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3234 m_coreContainer.unload(core.getName(), false, false, true); 3235 } 3236 } 3237 m_coreContainer.shutdown(); 3238 if (CmsLog.INIT.isInfoEnabled()) { 3239 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3240 } 3241 m_coreContainer = null; 3242 } 3243 } 3244 3245}