001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search; 033 034import org.opencms.configuration.CmsParameterConfiguration; 035import org.opencms.file.CmsObject; 036import org.opencms.file.CmsResource; 037import org.opencms.file.CmsResourceFilter; 038import org.opencms.file.types.CmsResourceTypeXmlContent; 039import org.opencms.main.CmsException; 040import org.opencms.main.CmsIllegalArgumentException; 041import org.opencms.main.CmsLog; 042import org.opencms.main.OpenCms; 043import org.opencms.report.I_CmsReport; 044import org.opencms.search.documents.I_CmsDocumentFactory; 045import org.opencms.search.documents.I_CmsTermHighlighter; 046import org.opencms.search.extractors.CmsExtractionResult; 047import org.opencms.search.extractors.I_CmsExtractionResult; 048import org.opencms.search.fields.CmsLuceneFieldConfiguration; 049import org.opencms.search.fields.CmsSearchField; 050import org.opencms.search.fields.CmsSearchFieldConfiguration; 051import org.opencms.util.CmsFileUtil; 052import org.opencms.util.CmsStringUtil; 053 054import java.io.File; 055import java.io.IOException; 056import java.nio.file.Paths; 057import java.text.ParseException; 058import java.util.ArrayList; 059import java.util.Calendar; 060import java.util.Collections; 061import java.util.Date; 062import java.util.HashMap; 063import java.util.List; 064import java.util.Locale; 065import java.util.Map; 066import java.util.Set; 067 068import org.apache.commons.logging.Log; 069import org.apache.lucene.analysis.Analyzer; 070import org.apache.lucene.document.DateTools; 071import org.apache.lucene.document.Document; 072import org.apache.lucene.index.DirectoryReader; 073import org.apache.lucene.index.FieldInfo; 074import org.apache.lucene.index.IndexReader; 075import org.apache.lucene.index.IndexWriter; 076import org.apache.lucene.index.IndexWriterConfig; 077import org.apache.lucene.index.StoredFieldVisitor; 078import org.apache.lucene.index.Term; 079import org.apache.lucene.queryparser.classic.QueryParser; 080import org.apache.lucene.search.BooleanClause; 081import org.apache.lucene.search.BooleanClause.Occur; 082import org.apache.lucene.search.BooleanQuery; 083import org.apache.lucene.search.IndexSearcher; 084import org.apache.lucene.search.MatchAllDocsQuery; 085import org.apache.lucene.search.MultiTermQuery; 086import org.apache.lucene.search.Query; 087import org.apache.lucene.search.ScoreMode; 088import org.apache.lucene.search.Sort; 089import org.apache.lucene.search.SortField; 090import org.apache.lucene.search.TermQuery; 091import org.apache.lucene.search.TopDocs; 092import org.apache.lucene.search.similarities.Similarity; 093import org.apache.lucene.store.Directory; 094import org.apache.lucene.store.FSDirectory; 095import org.apache.lucene.store.IOContext; 096import org.apache.solr.uninverting.UninvertingReader; 097import org.apache.solr.uninverting.UninvertingReader.Type; 098 099/** 100 * Abstract search index implementation.<p> 101 */ 102public class CmsSearchIndex extends A_CmsSearchIndex { 103 104 /** A constant for the full qualified name of the CmsSearchIndex class. */ 105 public static final String A_PARAM_PREFIX = "org.opencms.search.CmsSearchIndex"; 106 107 /** Constant for additional parameter to enable optimized full index regeneration (default: false). */ 108 public static final String BACKUP_REINDEXING = A_PARAM_PREFIX + ".useBackupReindexing"; 109 110 /** Look table to quickly zero-pad days / months in date Strings. */ 111 public static final String[] DATES = new String[] { 112 "00", 113 "01", 114 "02", 115 "03", 116 "04", 117 "05", 118 "06", 119 "07", 120 "08", 121 "09", 122 "10", 123 "11", 124 "12", 125 "13", 126 "14", 127 "15", 128 "16", 129 "17", 130 "18", 131 "19", 132 "20", 133 "21", 134 "22", 135 "23", 136 "24", 137 "25", 138 "26", 139 "27", 140 "28", 141 "29", 142 "30", 143 "31"}; 144 145 /** Constant for a field list that contains the "meta" field as well as the "content" field. */ 146 public static final String[] DOC_META_FIELDS = new String[] { 147 CmsSearchField.FIELD_META, 148 CmsSearchField.FIELD_CONTENT}; 149 150 /** Constant for additional parameter to enable excerpt creation (default: true). */ 151 public static final String EXCERPT = A_PARAM_PREFIX + ".createExcerpt"; 152 153 /** Constant for additional parameter for index content extraction. */ 154 public static final String EXTRACT_CONTENT = A_PARAM_PREFIX + ".extractContent"; 155 156 /** Constant for additional parameter to enable/disable language detection (default: false). */ 157 public static final String IGNORE_EXPIRATION = A_PARAM_PREFIX + ".ignoreExpiration"; 158 159 /** Constant for additional parameter to enable/disable language detection (default: false). */ 160 public static final String LANGUAGEDETECTION = "search.solr.useLanguageDetection"; 161 162 /** Constant for additional parameter for the Lucene index setting. */ 163 public static final String LUCENE_AUTO_COMMIT = "lucene.AutoCommit"; 164 165 /** Constant for additional parameter for the Lucene index setting. */ 166 public static final String LUCENE_RAM_BUFFER_SIZE_MB = "lucene.RAMBufferSizeMB"; 167 168 /** Constant for additional parameter for controlling how many hits are loaded at maximum (default: 1000). */ 169 public static final String MAX_HITS = A_PARAM_PREFIX + ".maxHits"; 170 171 /** Indicates how many hits are loaded at maximum by default. */ 172 public static final int MAX_HITS_DEFAULT = 5000; 173 174 /** Constant for years max range span in document search. */ 175 public static final int MAX_YEAR_RANGE = 25; 176 177 /** Constant for additional parameter to enable permission checks (default: true). */ 178 public static final String PERMISSIONS = A_PARAM_PREFIX + ".checkPermissions"; 179 180 /** Constant for additional parameter to set the thread priority during search. */ 181 public static final String PRIORITY = A_PARAM_PREFIX + ".priority"; 182 183 /** Constant for additional parameter to enable time range checks (default: true). */ 184 public static final String TIME_RANGE = A_PARAM_PREFIX + ".checkTimeRange"; 185 186 /** The document type name for XML contents. */ 187 public static final String TYPE_XMLCONTENT = "xmlcontent"; 188 189 /** 190 * A stored field visitor, that does not return the large fields: "content" and "contentblob".<p> 191 */ 192 protected static final StoredFieldVisitor VISITOR = new StoredFieldVisitor() { 193 194 /** 195 * @see org.apache.lucene.index.StoredFieldVisitor#needsField(org.apache.lucene.index.FieldInfo) 196 */ 197 @Override 198 public Status needsField(FieldInfo fieldInfo) { 199 200 return !CmsSearchFieldConfiguration.LAZY_FIELDS.contains(fieldInfo.name) ? Status.YES : Status.NO; 201 } 202 }; 203 204 /** The log object for this class. */ 205 private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class); 206 207 /** The serial version id. */ 208 private static final long serialVersionUID = 8461682478204452718L; 209 210 /** The configured Lucene analyzer used for this index. */ 211 private transient Analyzer m_analyzer; 212 213 /** Indicates if backup re-indexing is used by this index. */ 214 private boolean m_backupReindexing; 215 216 /** The permission check mode for this index. */ 217 private boolean m_checkPermissions; 218 219 /** The time range check mode for this index. */ 220 private boolean m_checkTimeRange; 221 222 /** The excerpt mode for this index. */ 223 private boolean m_createExcerpt; 224 225 /** Map of display query filters to use. */ 226 private transient Map<String, Query> m_displayFilters; 227 228 /** 229 * Signals whether expiration dates should be ignored when checking permissions or not.<p> 230 * @see #IGNORE_EXPIRATION 231 */ 232 private boolean m_ignoreExpiration; 233 234 /** The Lucene index searcher to use. */ 235 private transient IndexSearcher m_indexSearcher; 236 237 /** The Lucene index RAM buffer size, see {@link IndexWriterConfig#setRAMBufferSizeMB(double)}. */ 238 private Double m_luceneRAMBufferSizeMB; 239 240 /** Indicates how many hits are loaded at maximum. */ 241 private int m_maxHits; 242 243 /** The thread priority for a search. */ 244 private int m_priority; 245 246 /** Controls if a resource requires view permission to be displayed in the result list. */ 247 private boolean m_requireViewPermission; 248 249 /** The cms specific Similarity implementation. */ 250 private final transient Similarity m_sim = new CmsSearchSimilarity(); 251 252 /** 253 * Default constructor only intended to be used by the XML configuration. <p> 254 * 255 * It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code> 256 * as it enforces the mandatory name argument. <p> 257 */ 258 public CmsSearchIndex() { 259 260 super(); 261 m_checkPermissions = true; 262 m_priority = -1; 263 m_createExcerpt = true; 264 m_maxHits = MAX_HITS_DEFAULT; 265 m_checkTimeRange = false; 266 } 267 268 /** 269 * Creates a new CmsSearchIndex with the given name.<p> 270 * 271 * @param name the system-wide unique name for the search index 272 * 273 * @throws CmsIllegalArgumentException if the given name is null, empty or already taken by another search index 274 */ 275 public CmsSearchIndex(String name) 276 throws CmsIllegalArgumentException { 277 278 this(); 279 setName(name); 280 } 281 282 /** 283 * Generates a list of date terms for the optimized date range search with "daily" granularity level.<p> 284 * 285 * How this works:<ul> 286 * <li>For each document, terms are added for the year, the month and the day the document 287 * was modified or created) in. So for example if a document is modified at February 02, 2009, 288 * then the following terms are stored for this document: 289 * "20090202", "200902" and "2009".</li> 290 * <li>In case a date range search is done, then all possible matches for the 291 * provided rage are created as search terms and matched with the document terms.</li> 292 * <li>Consider the following use case: You want to find out if a resource has been changed 293 * in the time between November 29, 2007 and March 01, 2009. 294 * One term to match is simply "2008" because if a document 295 * was modified in 2008, then it is clearly in the date range. 296 * Other terms are "200712", "200901" and "200902", because all documents 297 * modified in these months are also a certain matches. 298 * Finally we need to add terms for "20071129", "20071130" and "20090301" to match the days in the 299 * starting and final month.</li> 300 * </ul> 301 * 302 * @param startDate start date of the range to search in 303 * @param endDate end date of the range to search in 304 * 305 * @return a list of date terms for the optimized date range search 306 */ 307 public static List<String> getDateRangeSpan(long startDate, long endDate) { 308 309 if (startDate > endDate) { 310 // switch so that the end is always before the start 311 long temp = endDate; 312 endDate = startDate; 313 startDate = temp; 314 } 315 316 List<String> result = new ArrayList<String>(100); 317 318 // initialize calendars from the time value 319 Calendar calStart = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); 320 Calendar calEnd = Calendar.getInstance(calStart.getTimeZone()); 321 calStart.setTimeInMillis(startDate); 322 calEnd.setTimeInMillis(endDate); 323 324 // get the required info to build the date range from the calendars 325 int startDay = calStart.get(Calendar.DAY_OF_MONTH); 326 int endDay = calEnd.get(Calendar.DAY_OF_MONTH); 327 int maxDayInStartMonth = calStart.getActualMaximum(Calendar.DAY_OF_MONTH); 328 int startMonth = calStart.get(Calendar.MONTH) + 1; 329 int endMonth = calEnd.get(Calendar.MONTH) + 1; 330 int startYear = calStart.get(Calendar.YEAR); 331 int endYear = calEnd.get(Calendar.YEAR); 332 333 // first add all full years in the date range 334 result.addAll(getYearSpan(startYear + 1, endYear - 1)); 335 336 if (startYear != endYear) { 337 // different year, different month 338 result.addAll(getMonthSpan(startMonth + 1, 12, startYear)); 339 result.addAll(getMonthSpan(1, endMonth - 1, endYear)); 340 result.addAll(getDaySpan(startDay, maxDayInStartMonth, startMonth, startYear)); 341 result.addAll(getDaySpan(1, endDay, endMonth, endYear)); 342 } else { 343 if (startMonth != endMonth) { 344 // same year, different month 345 result.addAll(getMonthSpan(startMonth + 1, endMonth - 1, startYear)); 346 result.addAll(getDaySpan(startDay, maxDayInStartMonth, startMonth, startYear)); 347 result.addAll(getDaySpan(1, endDay, endMonth, endYear)); 348 } else { 349 // same year, same month 350 result.addAll(getDaySpan(startDay, endDay, endMonth, endYear)); 351 } 352 } 353 354 // sort the result, makes the range better readable in the debugger 355 Collections.sort(result); 356 return result; 357 } 358 359 /** 360 * Calculate a span of days in the given year and month for the optimized date range search.<p> 361 * 362 * The result will contain dates formatted like "yyyyMMDD", for example "20080131".<p> 363 * 364 * @param startDay the start day 365 * @param endDay the end day 366 * @param month the month 367 * @param year the year 368 * 369 * @return a span of days in the given year and month for the optimized date range search 370 */ 371 private static List<String> getDaySpan(int startDay, int endDay, int month, int year) { 372 373 List<String> result = new ArrayList<String>(); 374 String yearMonthStr = String.valueOf(year) + DATES[month]; 375 for (int i = startDay; i <= endDay; i++) { 376 String dateStr = yearMonthStr + DATES[i]; 377 result.add(dateStr); 378 } 379 return result; 380 } 381 382 /** 383 * Calculate a span of months in the given year for the optimized date range search.<p> 384 * 385 * The result will contain dates formatted like "yyyyMM", for example "200801".<p> 386 * 387 * @param startMonth the start month 388 * @param endMonth the end month 389 * @param year the year 390 * 391 * @return a span of months in the given year for the optimized date range search 392 */ 393 private static List<String> getMonthSpan(int startMonth, int endMonth, int year) { 394 395 List<String> result = new ArrayList<String>(); 396 String yearStr = String.valueOf(year); 397 for (int i = startMonth; i <= endMonth; i++) { 398 String dateStr = yearStr + DATES[i]; 399 result.add(dateStr); 400 } 401 return result; 402 } 403 404 /** 405 * Calculate a span of years for the optimized date range search.<p> 406 * 407 * The result will contain dates formatted like "yyyy", for example "2008".<p> 408 * 409 * @param startYear the start year 410 * @param endYear the end year 411 * 412 * @return a span of years for the optimized date range search 413 */ 414 private static List<String> getYearSpan(int startYear, int endYear) { 415 416 List<String> result = new ArrayList<String>(); 417 for (int i = startYear; i <= endYear; i++) { 418 String dateStr = String.valueOf(i); 419 result.add(dateStr); 420 } 421 return result; 422 } 423 424 /** 425 * Adds a parameter.<p> 426 * 427 * @param key the key/name of the parameter 428 * @param value the value of the parameter 429 * 430 */ 431 @Override 432 public void addConfigurationParameter(String key, String value) { 433 434 if (PERMISSIONS.equals(key)) { 435 m_checkPermissions = Boolean.valueOf(value).booleanValue(); 436 } else if (EXTRACT_CONTENT.equals(key)) { 437 setExtractContent(Boolean.valueOf(value).booleanValue()); 438 } else if (BACKUP_REINDEXING.equals(key)) { 439 m_backupReindexing = Boolean.valueOf(value).booleanValue(); 440 } else if (LANGUAGEDETECTION.equals(key)) { 441 setLanguageDetection(Boolean.valueOf(value).booleanValue()); 442 } else if (IGNORE_EXPIRATION.equals(key)) { 443 m_ignoreExpiration = Boolean.valueOf(value).booleanValue(); 444 } else if (PRIORITY.equals(key)) { 445 m_priority = Integer.parseInt(value); 446 if (m_priority < Thread.MIN_PRIORITY) { 447 m_priority = Thread.MIN_PRIORITY; 448 LOG.error( 449 Messages.get().getBundle().key( 450 Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2, 451 value, 452 new Integer(Thread.MIN_PRIORITY))); 453 454 } else if (m_priority > Thread.MAX_PRIORITY) { 455 m_priority = Thread.MAX_PRIORITY; 456 LOG.debug( 457 Messages.get().getBundle().key( 458 Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2, 459 value, 460 new Integer(Thread.MAX_PRIORITY))); 461 } 462 } 463 464 if (MAX_HITS.equals(key)) { 465 try { 466 m_maxHits = Integer.parseInt(value); 467 } catch (NumberFormatException e) { 468 LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName())); 469 } 470 if (m_maxHits < (MAX_HITS_DEFAULT / 100)) { 471 m_maxHits = MAX_HITS_DEFAULT; 472 LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName())); 473 } 474 } else if (TIME_RANGE.equals(key)) { 475 m_checkTimeRange = Boolean.valueOf(value).booleanValue(); 476 } else if (CmsSearchIndex.EXCERPT.equals(key)) { 477 m_createExcerpt = Boolean.valueOf(value).booleanValue(); 478 479 } else if (LUCENE_RAM_BUFFER_SIZE_MB.equals(key)) { 480 try { 481 m_luceneRAMBufferSizeMB = Double.valueOf(value); 482 } catch (NumberFormatException e) { 483 LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName())); 484 } 485 } 486 } 487 488 /** 489 * Creates an empty document that can be used by this search field configuration.<p> 490 * 491 * @param resource the resource to create the document for 492 * 493 * @return a new and empty document 494 */ 495 public I_CmsSearchDocument createEmptyDocument(CmsResource resource) { 496 497 return new CmsLuceneDocument(new Document()); 498 } 499 500 /** 501 * Returns the Lucene analyzer used for this index.<p> 502 * 503 * @return the Lucene analyzer used for this index 504 */ 505 public Analyzer getAnalyzer() { 506 507 return m_analyzer; 508 } 509 510 /** 511 * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration() 512 */ 513 @Override 514 public CmsParameterConfiguration getConfiguration() { 515 516 CmsParameterConfiguration result = new CmsParameterConfiguration(); 517 if (getPriority() > 0) { 518 result.put(PRIORITY, String.valueOf(m_priority)); 519 } 520 if (!isExtractingContent()) { 521 result.put(EXTRACT_CONTENT, String.valueOf(isExtractingContent())); 522 } 523 if (!isCheckingPermissions()) { 524 result.put(PERMISSIONS, String.valueOf(m_checkPermissions)); 525 } 526 if (isBackupReindexing()) { 527 result.put(BACKUP_REINDEXING, String.valueOf(m_backupReindexing)); 528 } 529 if (isLanguageDetection()) { 530 result.put(LANGUAGEDETECTION, String.valueOf(isLanguageDetection())); 531 } 532 if (getMaxHits() != MAX_HITS_DEFAULT) { 533 result.put(MAX_HITS, String.valueOf(getMaxHits())); 534 } 535 if (!isCreatingExcerpt()) { 536 result.put(EXCERPT, String.valueOf(m_createExcerpt)); 537 } 538 if (m_luceneRAMBufferSizeMB != null) { 539 result.put(LUCENE_RAM_BUFFER_SIZE_MB, String.valueOf(m_luceneRAMBufferSizeMB)); 540 } 541 // always write time range check parameter because of logic change in OpenCms 8.0 542 result.put(TIME_RANGE, String.valueOf(m_checkTimeRange)); 543 return result; 544 } 545 546 /** 547 * @see org.opencms.search.I_CmsSearchIndex#getContentIfUnchanged(org.opencms.file.CmsResource) 548 */ 549 @Override 550 public I_CmsExtractionResult getContentIfUnchanged(CmsResource resource) { 551 552 // compare "date of last modification of content" from Lucene index and OpenCms VFS 553 // if this is identical, then the data from the Lucene index can be re-used 554 I_CmsSearchDocument oldDoc = getDocument(CmsSearchField.FIELD_PATH, resource.getRootPath()); 555 // first check if the document is already in the index 556 if ((oldDoc != null) && (oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT) != null)) { 557 long contentDateIndex = oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT).getTime(); 558 // now compare the date with the date stored in the resource 559 if (contentDateIndex == resource.getDateContent()) { 560 // extract stored content blob from index 561 return CmsExtractionResult.fromBytes(oldDoc.getContentBlob()); 562 } 563 } 564 return null; 565 } 566 567 /** 568 * Returns a document by document ID.<p> 569 * 570 * @param docId the id to get the document for 571 * 572 * @return the CMS specific document 573 */ 574 public I_CmsSearchDocument getDocument(int docId) { 575 576 try { 577 IndexSearcher searcher = getSearcher(); 578 return new CmsLuceneDocument(searcher.doc(docId)); 579 } catch (IOException e) { 580 // ignore, return null and assume document was not found 581 } 582 return null; 583 } 584 585 /** 586 * Returns the Lucene document with the given root path from the index.<p> 587 * 588 * @param rootPath the root path of the document to get 589 * 590 * @return the Lucene document with the given root path from the index 591 * 592 * @deprecated Use {@link #getDocument(String, String)} instead and provide {@link org.opencms.search.fields.CmsLuceneField#FIELD_PATH} as field to search in 593 */ 594 @Deprecated 595 public Document getDocument(String rootPath) { 596 597 if (getDocument(CmsSearchField.FIELD_PATH, rootPath) != null) { 598 return (Document)getDocument(CmsSearchField.FIELD_PATH, rootPath).getDocument(); 599 } 600 return null; 601 } 602 603 /** 604 * Returns the first document where the given term matches the selected index field.<p> 605 * 606 * Use this method to search for documents which have unique field values, like a unique id.<p> 607 * 608 * @param field the field to search in 609 * @param term the term to search for 610 * 611 * @return the first document where the given term matches the selected index field 612 */ 613 public I_CmsSearchDocument getDocument(String field, String term) { 614 615 Document result = null; 616 IndexSearcher searcher = getSearcher(); 617 if (searcher != null) { 618 // search for an exact match on the selected field 619 Term resultTerm = new Term(field, term); 620 try { 621 TopDocs hits = searcher.search(new TermQuery(resultTerm), 1); 622 if (hits.scoreDocs.length > 0) { 623 result = searcher.doc(hits.scoreDocs[0].doc); 624 } 625 } catch (IOException e) { 626 // ignore, return null and assume document was not found 627 } 628 } 629 if (result != null) { 630 return new CmsLuceneDocument(result); 631 } 632 return null; 633 } 634 635 /** 636 * Returns the document type factory used for the given resource in this index, or <code>null</code> 637 * in case the resource is not indexed by this index.<p> 638 * 639 * A resource is indexed if the following is all true: <ol> 640 * <li>The index contains at last one index source matching the root path of the given resource. 641 * <li>For this matching index source, the document type factory needed by the resource is also configured. 642 * </ol> 643 * 644 * @param res the resource to check 645 * 646 * @return he document type factory used for the given resource in this index, or <code>null</code> 647 * in case the resource is not indexed by this index 648 */ 649 @Override 650 public I_CmsDocumentFactory getDocumentFactory(CmsResource res) { 651 652 if ((res != null) && (getSources() != null)) { 653 // the result can only be null or the type configured for the resource 654 I_CmsDocumentFactory result = OpenCms.getSearchManager().getDocumentFactory(res); 655 if (result != null) { 656 // check the path of the resource if it matches with one (or more) of the configured index sources 657 for (CmsSearchIndexSource source : getSources()) { 658 if (source.isIndexing(res.getRootPath(), result.getName()) 659 || (source.isIndexing(res.getRootPath(), TYPE_XMLCONTENT) 660 && CmsResourceTypeXmlContent.isXmlContent(res))) { 661 // we found an index source that indexes the resource 662 return result; 663 } 664 } 665 } 666 } 667 return null; 668 } 669 670 /** 671 * Returns the language locale for the given resource in this index.<p> 672 * 673 * @param cms the current OpenCms user context 674 * @param resource the resource to check 675 * @param availableLocales a list of locales supported by the resource 676 * 677 * @return the language locale for the given resource in this index 678 */ 679 @Override 680 public Locale getLocaleForResource(CmsObject cms, CmsResource resource, List<Locale> availableLocales) { 681 682 Locale result; 683 List<Locale> defaultLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource); 684 List<Locale> locales = availableLocales; 685 if ((locales == null) || (locales.size() == 0)) { 686 locales = defaultLocales; 687 } 688 result = OpenCms.getLocaleManager().getBestMatchingLocale(getLocale(), defaultLocales, locales); 689 return result; 690 } 691 692 /** 693 * Returns the language locale of the index as a String.<p> 694 * 695 * @return the language locale of the index as a String 696 * 697 * @see #getLocale() 698 */ 699 public String getLocaleString() { 700 701 return getLocale().toString(); 702 } 703 704 /** 705 * Indicates the number of how many hits are loaded at maximum.<p> 706 * 707 * The number of maximum documents to load from the index 708 * must be specified. The default of this setting is {@link CmsSearchIndex#MAX_HITS_DEFAULT} (5000). 709 * This means that at maximum 5000 results are returned from the index. 710 * Please note that this number may be reduced further because of OpenCms read permissions 711 * or per-user file visibility settings not controlled in the index.<p> 712 * 713 * @return the number of how many hits are loaded at maximum 714 * 715 * @since 7.5.1 716 */ 717 public int getMaxHits() { 718 719 return m_maxHits; 720 } 721 722 /** 723 * Returns the path where this index stores it's data in the "real" file system.<p> 724 * 725 * @return the path where this index stores it's data in the "real" file system 726 */ 727 @Override 728 public String getPath() { 729 730 if (super.getPath() == null) { 731 setPath(generateIndexDirectory()); 732 } 733 return super.getPath(); 734 } 735 736 /** 737 * Returns the Thread priority for this search index.<p> 738 * 739 * @return the Thread priority for this search index 740 */ 741 public int getPriority() { 742 743 return m_priority; 744 } 745 746 /** 747 * Returns the Lucene index searcher used for this search index.<p> 748 * 749 * @return the Lucene index searcher used for this search index 750 */ 751 public IndexSearcher getSearcher() { 752 753 return m_indexSearcher; 754 } 755 756 /** 757 * @see org.opencms.search.A_CmsSearchIndex#initialize() 758 */ 759 @Override 760 public void initialize() throws CmsSearchException { 761 762 super.initialize(); 763 764 // get the configured analyzer and apply the the field configuration analyzer wrapper 765 @SuppressWarnings("resource") 766 Analyzer baseAnalyzer = OpenCms.getSearchManager().getAnalyzer(getLocale()); 767 768 if (getFieldConfiguration() instanceof CmsLuceneFieldConfiguration) { 769 CmsLuceneFieldConfiguration fc = (CmsLuceneFieldConfiguration)getFieldConfiguration(); 770 setAnalyzer(fc.getAnalyzer(baseAnalyzer)); 771 } 772 } 773 774 /** 775 * Returns <code>true</code> if backup re-indexing is done by this index.<p> 776 * 777 * This is an optimization method by which the old extracted content is 778 * reused in order to save performance when re-indexing.<p> 779 * 780 * @return <code>true</code> if backup re-indexing is done by this index 781 * 782 * @since 7.5.1 783 */ 784 public boolean isBackupReindexing() { 785 786 return m_backupReindexing; 787 } 788 789 /** 790 * Returns <code>true</code> if permissions are checked for search results by this index.<p> 791 * 792 * If permission checks are not required, they can be turned off in the index search configuration parameters 793 * in <code>opencms-search.xml</code>. Not checking permissions will improve performance.<p> 794 * 795 * This is can be of use in scenarios when you know that all search results are always readable, 796 * which is usually true for public websites that do not have personalized accounts.<p> 797 * 798 * Please note that even if a result is returned where the current user has no read permissions, 799 * the user can not actually access this document. It will only appear in the search result list, 800 * but if the user clicks the link to open the document he will get an error.<p> 801 * 802 * 803 * @return <code>true</code> if permissions are checked for search results by this index 804 */ 805 public boolean isCheckingPermissions() { 806 807 return m_checkPermissions; 808 } 809 810 /** 811 * Returns <code>true</code> if the document time range is checked with a granularity level of seconds 812 * for search results by this index.<p> 813 * 814 * Since OpenCms 8.0, time range checks are always done if {@link CmsSearchParameters#setMinDateLastModified(long)} 815 * or any of the corresponding methods are used. 816 * This is done very efficiently using optimized Lucene filers. 817 * However, the granularity of these checks are done only on a daily 818 * basis, which means that you can only find "changes made yesterday" but not "changes made last hour". 819 * For normal limitation of search results, a daily granularity should be enough.<p> 820 * 821 * If time range checks with a granularity level of seconds are required, 822 * they can be turned on in the index search configuration parameters 823 * in <code>opencms-search.xml</code>. 824 * Not checking the time range with a granularity level of seconds will improve performance.<p> 825 * 826 * By default the granularity level of seconds is turned off since OpenCms 8.0<p> 827 * 828 * @return <code>true</code> if the document time range is checked with a granularity level of seconds for search results by this index 829 */ 830 public boolean isCheckingTimeRange() { 831 832 return m_checkTimeRange; 833 } 834 835 /** 836 * Returns the checkPermissions.<p> 837 * 838 * @return the checkPermissions 839 */ 840 public boolean isCheckPermissions() { 841 842 return m_checkPermissions; 843 } 844 845 /** 846 * Returns <code>true</code> if an excerpt is generated by this index.<p> 847 * 848 * If no except is required, generation can be turned off in the index search configuration parameters 849 * in <code>opencms-search.xml</code>. Not generating an excerpt will improve performance.<p> 850 * 851 * @return <code>true</code> if an excerpt is generated by this index 852 */ 853 public boolean isCreatingExcerpt() { 854 855 return m_createExcerpt; 856 } 857 858 /** 859 * Returns the ignoreExpiration.<p> 860 * 861 * @return the ignoreExpiration 862 */ 863 public boolean isIgnoreExpiration() { 864 865 return m_ignoreExpiration; 866 } 867 868 /** 869 * @see org.opencms.search.A_CmsSearchIndex#isInitialized() 870 */ 871 @Override 872 public boolean isInitialized() { 873 874 return super.isInitialized() && (null != getPath()); 875 } 876 877 /** 878 * Returns <code>true</code> if a resource requires read permission to be included in the result list.<p> 879 * 880 * @return <code>true</code> if a resource requires read permission to be included in the result list 881 */ 882 public boolean isRequireViewPermission() { 883 884 return m_requireViewPermission; 885 } 886 887 /** 888 * @see org.opencms.search.A_CmsSearchIndex#onIndexChanged(boolean) 889 */ 890 @Override 891 public void onIndexChanged(boolean force) { 892 893 if (force) { 894 indexSearcherOpen(getPath()); 895 } else { 896 indexSearcherUpdate(); 897 } 898 } 899 900 /** 901 * Performs a search on the index within the given fields.<p> 902 * 903 * The result is returned as List with entries of type I_CmsSearchResult.<p> 904 * 905 * @param cms the current user's Cms object 906 * @param params the parameters to use for the search 907 * 908 * @return the List of results found or an empty list 909 * 910 * @throws CmsSearchException if something goes wrong 911 */ 912 public CmsSearchResultList search(CmsObject cms, CmsSearchParameters params) throws CmsSearchException { 913 914 long timeTotal = -System.currentTimeMillis(); 915 long timeLucene; 916 long timeResultProcessing; 917 918 if (LOG.isDebugEnabled()) { 919 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SEARCH_PARAMS_2, params, getName())); 920 } 921 922 // the hits found during the search 923 TopDocs hits; 924 925 // storage for the results found 926 CmsSearchResultList searchResults = new CmsSearchResultList(); 927 928 int previousPriority = Thread.currentThread().getPriority(); 929 930 try { 931 // copy the user OpenCms context 932 CmsObject searchCms = OpenCms.initCmsObject(cms); 933 934 if (getPriority() > 0) { 935 // change thread priority in order to reduce search impact on overall system performance 936 Thread.currentThread().setPriority(getPriority()); 937 } 938 939 // change the project 940 searchCms.getRequestContext().setCurrentProject(searchCms.readProject(getProject())); 941 942 timeLucene = -System.currentTimeMillis(); 943 944 // several search options are searched using filters 945 BooleanQuery.Builder builder = new BooleanQuery.Builder(); 946 // append root path filter 947 builder = appendPathFilter(searchCms, builder, params.getRoots()); 948 // append category filter 949 builder = appendCategoryFilter(searchCms, builder, params.getCategories()); 950 // append resource type filter 951 builder = appendResourceTypeFilter(searchCms, builder, params.getResourceTypes()); 952 953 // append date last modified filter 954 builder = appendDateLastModifiedFilter( 955 builder, 956 params.getMinDateLastModified(), 957 params.getMaxDateLastModified()); 958 // append date created filter 959 builder = appendDateCreatedFilter(builder, params.getMinDateCreated(), params.getMaxDateCreated()); 960 961 // the search query to use, will be constructed in the next lines 962 Query query = null; 963 // store separate fields query for excerpt highlighting 964 Query fieldsQuery = null; 965 966 // get an index searcher that is certainly up to date 967 indexSearcherUpdate(); 968 IndexSearcher searcher = getSearcher(); 969 970 if (!params.isIgnoreQuery()) { 971 // since OpenCms 8 the query can be empty in which case only filters are used for the result 972 if (params.getParsedQuery() != null) { 973 // the query was already build, re-use it 974 QueryParser p = new QueryParser(CmsSearchField.FIELD_CONTENT, getAnalyzer()); 975 fieldsQuery = p.parse(params.getParsedQuery()); 976 } else if (params.getFieldQueries() != null) { 977 // each field has an individual query 978 BooleanQuery.Builder mustOccur = null; 979 BooleanQuery.Builder shouldOccur = null; 980 for (CmsSearchParameters.CmsSearchFieldQuery fq : params.getFieldQueries()) { 981 // add one sub-query for each defined field 982 QueryParser p = new QueryParser(fq.getFieldName(), getAnalyzer()); 983 // first generate the combined keyword query 984 Query keywordQuery = null; 985 if (fq.getSearchTerms().size() == 1) { 986 // this is just a single size keyword list 987 keywordQuery = p.parse(fq.getSearchTerms().get(0)); 988 } else { 989 // multiple size keyword list 990 BooleanQuery.Builder keywordListQuery = new BooleanQuery.Builder(); 991 for (String keyword : fq.getSearchTerms()) { 992 keywordListQuery.add(p.parse(keyword), fq.getTermOccur()); 993 } 994 keywordQuery = keywordListQuery.build(); 995 } 996 if (BooleanClause.Occur.SHOULD.equals(fq.getOccur())) { 997 if (shouldOccur == null) { 998 shouldOccur = new BooleanQuery.Builder(); 999 } 1000 shouldOccur.add(keywordQuery, fq.getOccur()); 1001 } else { 1002 if (mustOccur == null) { 1003 mustOccur = new BooleanQuery.Builder(); 1004 } 1005 mustOccur.add(keywordQuery, fq.getOccur()); 1006 } 1007 } 1008 BooleanQuery.Builder booleanFieldsQuery = new BooleanQuery.Builder(); 1009 if (mustOccur != null) { 1010 booleanFieldsQuery.add(mustOccur.build(), BooleanClause.Occur.MUST); 1011 } 1012 if (shouldOccur != null) { 1013 booleanFieldsQuery.add(shouldOccur.build(), BooleanClause.Occur.MUST); 1014 } 1015 fieldsQuery = searcher.rewrite(booleanFieldsQuery.build()); 1016 } else if ((params.getFields() != null) && (params.getFields().size() > 0)) { 1017 // no individual field queries have been defined, so use one query for all fields 1018 BooleanQuery.Builder booleanFieldsQuery = new BooleanQuery.Builder(); 1019 // this is a "regular" query over one or more fields 1020 // add one sub-query for each of the selected fields, e.g. "content", "title" etc. 1021 for (int i = 0; i < params.getFields().size(); i++) { 1022 QueryParser p = new QueryParser(params.getFields().get(i), getAnalyzer()); 1023 p.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); 1024 booleanFieldsQuery.add(p.parse(params.getQuery()), BooleanClause.Occur.SHOULD); 1025 } 1026 fieldsQuery = searcher.rewrite(booleanFieldsQuery.build()); 1027 } else { 1028 // if no fields are provided, just use the "content" field by default 1029 QueryParser p = new QueryParser(CmsSearchField.FIELD_CONTENT, getAnalyzer()); 1030 fieldsQuery = searcher.rewrite(p.parse(params.getQuery())); 1031 } 1032 1033 // finally set the main query to the fields query 1034 // please note that we still need both variables in case the query is a MatchAllDocsQuery - see below 1035 query = fieldsQuery; 1036 } 1037 1038 if (LOG.isDebugEnabled()) { 1039 LOG.debug(Messages.get().getBundle().key(Messages.LOG_BASE_QUERY_1, query)); 1040 } 1041 1042 if (query == null) { 1043 // if no text query is set, then we match all documents 1044 query = new MatchAllDocsQuery(); 1045 } else { 1046 // store the parsed query for page browsing 1047 params.setParsedQuery(query.toString(CmsSearchField.FIELD_CONTENT)); 1048 } 1049 1050 // build the final query 1051 final BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder(); 1052 finalQueryBuilder.add(query, BooleanClause.Occur.MUST); 1053 finalQueryBuilder.add(builder.build(), BooleanClause.Occur.FILTER); 1054 final BooleanQuery finalQuery = finalQueryBuilder.build(); 1055 1056 // collect the categories 1057 CmsSearchCategoryCollector categoryCollector; 1058 if (params.isCalculateCategories()) { 1059 // USE THIS OPTION WITH CAUTION 1060 // this may slow down searched by an order of magnitude 1061 categoryCollector = new CmsSearchCategoryCollector(searcher); 1062 // perform a first search to collect the categories 1063 searcher.search(finalQuery, categoryCollector); 1064 // store the result 1065 searchResults.setCategories(categoryCollector.getCategoryCountResult()); 1066 } 1067 1068 // get maxScore first, since Lucene 8, it's not computed automatically anymore 1069 TopDocs scoreHits = searcher.search(query, 1); 1070 float maxScore = scoreHits.scoreDocs.length == 0 ? Float.NaN : scoreHits.scoreDocs[0].score; 1071 // perform the search operation 1072 if ((params.getSort() == null) || (params.getSort() == CmsSearchParameters.SORT_DEFAULT)) { 1073 // apparently scoring is always enabled by Lucene if no sort order is provided 1074 hits = searcher.search(finalQuery, getMaxHits()); 1075 } else { 1076 // if a sort order is provided, we must check if scoring must be calculated by the searcher 1077 boolean isSortScore = isSortScoring(searcher, params.getSort()); 1078 hits = searcher.search(finalQuery, getMaxHits(), params.getSort(), isSortScore); 1079 } 1080 1081 timeLucene += System.currentTimeMillis(); 1082 timeResultProcessing = -System.currentTimeMillis(); 1083 1084 if (hits != null) { 1085 long hitCount = hits.totalHits.value > hits.scoreDocs.length 1086 ? hits.scoreDocs.length 1087 : hits.totalHits.value; 1088 int page = params.getSearchPage(); 1089 long start = -1, end = -1; 1090 if ((params.getMatchesPerPage() > 0) && (page > 0) && (hitCount > 0)) { 1091 // calculate the final size of the search result 1092 start = params.getMatchesPerPage() * (page - 1); 1093 end = start + params.getMatchesPerPage(); 1094 // ensure that both i and n are inside the range of foundDocuments.size() 1095 start = (start > hitCount) ? hitCount : start; 1096 end = (end > hitCount) ? hitCount : end; 1097 } else { 1098 // return all found documents in the search result 1099 start = 0; 1100 end = hitCount; 1101 } 1102 1103 Set<String> returnFields = ((CmsLuceneFieldConfiguration)getFieldConfiguration()).getReturnFields(); 1104 Set<String> excerptFields = ((CmsLuceneFieldConfiguration)getFieldConfiguration()).getExcerptFields(); 1105 1106 long visibleHitCount = hitCount; 1107 for (int i = 0, cnt = 0; (i < hitCount) && (cnt < end); i++) { 1108 try { 1109 Document doc = searcher.doc(hits.scoreDocs[i].doc, returnFields); 1110 I_CmsSearchDocument searchDoc = new CmsLuceneDocument(doc); 1111 searchDoc.setScore(hits.scoreDocs[i].score); 1112 if ((isInTimeRange(doc, params)) && (hasReadPermission(searchCms, searchDoc))) { 1113 // user has read permission 1114 if (cnt >= start) { 1115 // do not use the resource to obtain the raw content, read it from the lucene document! 1116 String excerpt = null; 1117 if (isCreatingExcerpt() && (fieldsQuery != null)) { 1118 Document exDoc = searcher.doc(hits.scoreDocs[i].doc, excerptFields); 1119 I_CmsTermHighlighter highlighter = OpenCms.getSearchManager().getHighlighter(); 1120 excerpt = highlighter.getExcerpt(exDoc, this, params, fieldsQuery, getAnalyzer()); 1121 } 1122 int score = Math.round( 1123 (maxScore != Float.NaN ? (hits.scoreDocs[i].score / maxScore) * 100f : 0)); 1124 searchResults.add(new CmsSearchResult(score, doc, excerpt)); 1125 } 1126 cnt++; 1127 } else { 1128 visibleHitCount--; 1129 } 1130 } catch (Exception e) { 1131 // should not happen, but if it does we want to go on with the next result nevertheless 1132 if (LOG.isWarnEnabled()) { 1133 LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e); 1134 } 1135 } 1136 } 1137 1138 // save the total count of search results 1139 searchResults.setHitCount((int)visibleHitCount); 1140 } else { 1141 searchResults.setHitCount(0); 1142 } 1143 1144 timeResultProcessing += System.currentTimeMillis(); 1145 } catch (RuntimeException e) { 1146 throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e); 1147 } catch (Exception e) { 1148 throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e); 1149 } finally { 1150 1151 // re-set thread to previous priority 1152 Thread.currentThread().setPriority(previousPriority); 1153 } 1154 1155 if (LOG.isDebugEnabled()) { 1156 timeTotal += System.currentTimeMillis(); 1157 Object[] logParams = new Object[] { 1158 new Long(hits == null ? 0 : hits.totalHits.value), 1159 new Long(timeTotal), 1160 new Long(timeLucene), 1161 new Long(timeResultProcessing)}; 1162 LOG.debug(Messages.get().getBundle().key(Messages.LOG_STAT_RESULTS_TIME_4, logParams)); 1163 } 1164 1165 return searchResults; 1166 } 1167 1168 /** 1169 * Sets the Lucene analyzer used for this index.<p> 1170 * 1171 * @param analyzer the Lucene analyzer to set 1172 */ 1173 public void setAnalyzer(Analyzer analyzer) { 1174 1175 m_analyzer = analyzer; 1176 } 1177 1178 /** 1179 * Sets the checkPermissions.<p> 1180 * 1181 * @param checkPermissions the checkPermissions to set 1182 */ 1183 public void setCheckPermissions(boolean checkPermissions) { 1184 1185 m_checkPermissions = checkPermissions; 1186 } 1187 1188 /** 1189 * Sets the ignoreExpiration.<p> 1190 * 1191 * @param ignoreExpiration the ignoreExpiration to set 1192 */ 1193 public void setIgnoreExpiration(boolean ignoreExpiration) { 1194 1195 m_ignoreExpiration = ignoreExpiration; 1196 } 1197 1198 /** 1199 * Sets the number of how many hits are loaded at maximum.<p> 1200 * 1201 * This must be set at least to 50, or this setting is ignored.<p> 1202 * 1203 * @param maxHits the number of how many hits are loaded at maximum to set 1204 * 1205 * @see #getMaxHits() 1206 * 1207 * @since 7.5.1 1208 */ 1209 public void setMaxHits(int maxHits) { 1210 1211 if (m_maxHits >= (MAX_HITS_DEFAULT / 100)) { 1212 m_maxHits = maxHits; 1213 } 1214 } 1215 1216 /** 1217 * Controls if a resource requires view permission to be displayed in the result list.<p> 1218 * 1219 * By default this is <code>false</code>.<p> 1220 * 1221 * @param requireViewPermission controls if a resource requires view permission to be displayed in the result list 1222 */ 1223 public void setRequireViewPermission(boolean requireViewPermission) { 1224 1225 m_requireViewPermission = requireViewPermission; 1226 } 1227 1228 /** 1229 * Shuts down the search index.<p> 1230 * 1231 * This will close the local Lucene index searcher instance.<p> 1232 */ 1233 @Override 1234 public void shutDown() { 1235 1236 super.shutDown(); 1237 indexSearcherClose(); 1238 if (m_analyzer != null) { 1239 m_analyzer.close(); 1240 } 1241 if (CmsLog.INIT.isInfoEnabled()) { 1242 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_INDEX_1, getName())); 1243 } 1244 } 1245 1246 /** 1247 * Returns the name (<code>{@link #getName()}</code>) of this search index.<p> 1248 * 1249 * @return the name (<code>{@link #getName()}</code>) of this search index 1250 * 1251 * @see java.lang.Object#toString() 1252 */ 1253 @Override 1254 public String toString() { 1255 1256 return getName(); 1257 } 1258 1259 /** 1260 * Appends the a category filter to the given filter clause that matches all given categories.<p> 1261 * 1262 * In case the provided List is null or empty, the original filter is left unchanged.<p> 1263 * 1264 * The original filter parameter is extended and also provided as return value.<p> 1265 * 1266 * @param cms the current OpenCms search context 1267 * @param filter the filter to extend 1268 * @param categories the categories that will compose the filter 1269 * 1270 * @return the extended filter clause 1271 */ 1272 protected BooleanQuery.Builder appendCategoryFilter( 1273 CmsObject cms, 1274 BooleanQuery.Builder filter, 1275 List<String> categories) { 1276 1277 if ((categories != null) && (categories.size() > 0)) { 1278 // add query categories (if required) 1279 1280 // categories are indexed as lower-case strings 1281 // @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendCategories 1282 List<String> lowerCaseCategories = new ArrayList<String>(); 1283 for (String category : categories) { 1284 lowerCaseCategories.add(category.toLowerCase()); 1285 } 1286 filter.add( 1287 new BooleanClause( 1288 getMultiTermQueryFilter(CmsSearchField.FIELD_CATEGORY, lowerCaseCategories), 1289 BooleanClause.Occur.MUST)); 1290 } 1291 1292 return filter; 1293 } 1294 1295 /** 1296 * Appends a date of creation filter to the given filter clause that matches the 1297 * given time range.<p> 1298 * 1299 * If the start time is equal to {@link Long#MIN_VALUE} and the end time is equal to {@link Long#MAX_VALUE} 1300 * than the original filter is left unchanged.<p> 1301 * 1302 * The original filter parameter is extended and also provided as return value.<p> 1303 * 1304 * @param filter the filter to extend 1305 * @param startTime start time of the range to search in 1306 * @param endTime end time of the range to search in 1307 * 1308 * @return the extended filter clause 1309 */ 1310 protected BooleanQuery.Builder appendDateCreatedFilter(BooleanQuery.Builder filter, long startTime, long endTime) { 1311 1312 // create special optimized sub-filter for the date last modified search 1313 Query dateFilter = createDateRangeFilter(CmsSearchField.FIELD_DATE_CREATED_LOOKUP, startTime, endTime); 1314 if (dateFilter != null) { 1315 // extend main filter with the created date filter 1316 filter.add(new BooleanClause(dateFilter, BooleanClause.Occur.MUST)); 1317 } 1318 1319 return filter; 1320 } 1321 1322 /** 1323 * Appends a date of last modification filter to the given filter clause that matches the 1324 * given time range.<p> 1325 * 1326 * If the start time is equal to {@link Long#MIN_VALUE} and the end time is equal to {@link Long#MAX_VALUE} 1327 * than the original filter is left unchanged.<p> 1328 * 1329 * The original filter parameter is extended and also provided as return value.<p> 1330 * 1331 * @param filter the filter to extend 1332 * @param startTime start time of the range to search in 1333 * @param endTime end time of the range to search in 1334 * 1335 * @return the extended filter clause 1336 */ 1337 protected BooleanQuery.Builder appendDateLastModifiedFilter( 1338 BooleanQuery.Builder filter, 1339 long startTime, 1340 long endTime) { 1341 1342 // create special optimized sub-filter for the date last modified search 1343 Query dateFilter = createDateRangeFilter(CmsSearchField.FIELD_DATE_LASTMODIFIED_LOOKUP, startTime, endTime); 1344 if (dateFilter != null) { 1345 // extend main filter with the created date filter 1346 filter.add(new BooleanClause(dateFilter, BooleanClause.Occur.MUST)); 1347 } 1348 1349 return filter; 1350 } 1351 1352 /** 1353 * Appends the a VFS path filter to the given filter clause that matches all given root paths.<p> 1354 * 1355 * In case the provided List is null or empty, the current request context site root is appended.<p> 1356 * 1357 * The original filter parameter is extended and also provided as return value.<p> 1358 * 1359 * @param cms the current OpenCms search context 1360 * @param filter the filter to extend 1361 * @param roots the VFS root paths that will compose the filter 1362 * 1363 * @return the extended filter clause 1364 */ 1365 protected BooleanQuery.Builder appendPathFilter(CmsObject cms, BooleanQuery.Builder filter, List<String> roots) { 1366 1367 // complete the search root 1368 List<Term> terms = new ArrayList<Term>(); 1369 if ((roots != null) && (roots.size() > 0)) { 1370 // add the all configured search roots with will request context 1371 for (int i = 0; i < roots.size(); i++) { 1372 String searchRoot = cms.getRequestContext().addSiteRoot(roots.get(i)); 1373 extendPathFilter(terms, searchRoot); 1374 } 1375 } else { 1376 // use the current site root as the search root 1377 extendPathFilter(terms, cms.getRequestContext().getSiteRoot()); 1378 // also add the shared folder (v 8.0) 1379 if (OpenCms.getSiteManager().getSharedFolder() != null) { 1380 extendPathFilter(terms, OpenCms.getSiteManager().getSharedFolder()); 1381 } 1382 } 1383 1384 // add the calculated path filter for the root path 1385 BooleanQuery.Builder build = new BooleanQuery.Builder(); 1386 terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD)); 1387 filter.add(new BooleanClause(build.build(), BooleanClause.Occur.MUST)); 1388 return filter; 1389 } 1390 1391 /** 1392 * Appends the a resource type filter to the given filter clause that matches all given resource types.<p> 1393 * 1394 * In case the provided List is null or empty, the original filter is left unchanged.<p> 1395 * 1396 * The original filter parameter is extended and also provided as return value.<p> 1397 * 1398 * @param cms the current OpenCms search context 1399 * @param filter the filter to extend 1400 * @param resourceTypes the resource types that will compose the filter 1401 * 1402 * @return the extended filter clause 1403 */ 1404 protected BooleanQuery.Builder appendResourceTypeFilter( 1405 CmsObject cms, 1406 BooleanQuery.Builder filter, 1407 List<String> resourceTypes) { 1408 1409 if ((resourceTypes != null) && (resourceTypes.size() > 0)) { 1410 // add query resource types (if required) 1411 filter.add( 1412 new BooleanClause( 1413 getMultiTermQueryFilter(CmsSearchField.FIELD_TYPE, resourceTypes), 1414 BooleanClause.Occur.MUST)); 1415 } 1416 1417 return filter; 1418 } 1419 1420 /** 1421 * Creates an optimized date range filter for the date of last modification or creation.<p> 1422 * 1423 * If the start date is equal to {@link Long#MIN_VALUE} and the end date is equal to {@link Long#MAX_VALUE} 1424 * than <code>null</code> is returned.<p> 1425 * 1426 * @param fieldName the name of the field to search 1427 * @param startTime start time of the range to search in 1428 * @param endTime end time of the range to search in 1429 * 1430 * @return an optimized date range filter for the date of last modification or creation 1431 */ 1432 protected Query createDateRangeFilter(String fieldName, long startTime, long endTime) { 1433 1434 Query filter = null; 1435 if ((startTime != Long.MIN_VALUE) || (endTime != Long.MAX_VALUE)) { 1436 // a date range has been set for this document search 1437 if (startTime == Long.MIN_VALUE) { 1438 // default start will always be "yyyy1231" in order to reduce term size 1439 Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); 1440 cal.setTimeInMillis(endTime); 1441 cal.set(cal.get(Calendar.YEAR) - MAX_YEAR_RANGE, 11, 31, 0, 0, 0); 1442 startTime = cal.getTimeInMillis(); 1443 } else if (endTime == Long.MAX_VALUE) { 1444 // default end will always be "yyyy0101" in order to reduce term size 1445 Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); 1446 cal.setTimeInMillis(startTime); 1447 cal.set(cal.get(Calendar.YEAR) + MAX_YEAR_RANGE, 0, 1, 0, 0, 0); 1448 endTime = cal.getTimeInMillis(); 1449 } 1450 1451 // get the list of all possible date range options 1452 List<String> dateRange = getDateRangeSpan(startTime, endTime); 1453 List<Term> terms = new ArrayList<Term>(); 1454 for (String range : dateRange) { 1455 terms.add(new Term(fieldName, range)); 1456 } 1457 // create the filter for the date 1458 BooleanQuery.Builder build = new BooleanQuery.Builder(); 1459 terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD)); 1460 filter = build.build(); 1461 } 1462 return filter; 1463 } 1464 1465 /** 1466 * Creates a backup of this index for optimized re-indexing of the whole content.<p> 1467 * 1468 * @return the path to the backup folder, or <code>null</code> in case no backup was created 1469 */ 1470 protected String createIndexBackup() { 1471 1472 if (!isBackupReindexing()) { 1473 // if no backup is generated we don't need to do anything 1474 return null; 1475 } 1476 1477 // check if the target directory already exists 1478 File file = new File(getPath()); 1479 if (!file.exists()) { 1480 // index does not exist yet, so we can't backup it 1481 return null; 1482 } 1483 String backupPath = getPath() + "_backup"; 1484 FSDirectory oldDir = null; 1485 FSDirectory newDir = null; 1486 try { 1487 // open file directory for Lucene 1488 oldDir = FSDirectory.open(file.toPath()); 1489 newDir = FSDirectory.open(Paths.get(backupPath)); 1490 for (String fileName : oldDir.listAll()) { 1491 newDir.copyFrom(oldDir, fileName, fileName, IOContext.DEFAULT); 1492 } 1493 } catch (Exception e) { 1494 LOG.error( 1495 Messages.get().getBundle().key(Messages.LOG_IO_INDEX_BACKUP_CREATE_3, getName(), getPath(), backupPath), 1496 e); 1497 backupPath = null; 1498 } finally { 1499 if (oldDir != null) { 1500 try { 1501 oldDir.close(); 1502 } catch (IOException e) { 1503 e.printStackTrace(); 1504 } 1505 } 1506 if (newDir != null) { 1507 try { 1508 newDir.close(); 1509 } catch (IOException e) { 1510 e.printStackTrace(); 1511 } 1512 } 1513 } 1514 return backupPath; 1515 } 1516 1517 /** 1518 * Creates a new index writer.<p> 1519 * 1520 * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated 1521 * @param report the report 1522 * 1523 * @return the created new index writer 1524 * 1525 * @throws CmsIndexException in case the writer could not be created 1526 * 1527 * @see #getIndexWriter(I_CmsReport, boolean) 1528 */ 1529 @Override 1530 protected I_CmsIndexWriter createIndexWriter(boolean create, I_CmsReport report) throws CmsIndexException { 1531 1532 IndexWriter indexWriter = null; 1533 FSDirectory dir = null; 1534 try { 1535 File f = new File(getPath()); 1536 if (!f.exists()) { 1537 f = f.getParentFile(); 1538 if ((f != null) && (!f.exists())) { 1539 f.mkdirs(); 1540 } 1541 1542 create = true; 1543 } 1544 1545 dir = FSDirectory.open(Paths.get(getPath())); 1546 IndexWriterConfig indexConfig = new IndexWriterConfig(getAnalyzer()); 1547 //indexConfig.setMergePolicy(mergePolicy); 1548 1549 if (m_luceneRAMBufferSizeMB != null) { 1550 indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue()); 1551 } 1552 if (create) { 1553 indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 1554 } else { 1555 indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); 1556 } 1557 // register the modified default similarity implementation 1558 indexConfig.setSimilarity(m_sim); 1559 1560 indexWriter = new IndexWriter(dir, indexConfig); 1561 } catch (Exception e) { 1562 if (dir != null) { 1563 try { 1564 dir.close(); 1565 } catch (IOException e1) { 1566 // TODO Auto-generated catch block 1567 e1.printStackTrace(); 1568 } 1569 } 1570 if (indexWriter != null) { 1571 try { 1572 indexWriter.close(); 1573 } catch (IOException closeExeception) { 1574 throw new CmsIndexException( 1575 Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, getPath(), getName()), 1576 e); 1577 } 1578 } 1579 throw new CmsIndexException( 1580 Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, getPath(), getName()), 1581 e); 1582 } 1583 1584 return new CmsLuceneIndexWriter(indexWriter, this); 1585 } 1586 1587 /** 1588 * Extends the given path query with another term for the given search root element.<p> 1589 * 1590 * @param terms the path filter to extend 1591 * @param searchRoot the search root to add to the path query 1592 */ 1593 protected void extendPathFilter(List<Term> terms, String searchRoot) { 1594 1595 if (!CmsResource.isFolder(searchRoot)) { 1596 searchRoot += "/"; 1597 } 1598 terms.add(new Term(CmsSearchField.FIELD_PARENT_FOLDERS, searchRoot)); 1599 } 1600 1601 /** 1602 * Generates the directory on the RFS for this index.<p> 1603 * 1604 * @return the directory on the RFS for this index 1605 */ 1606 protected String generateIndexDirectory() { 1607 1608 return OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf( 1609 OpenCms.getSearchManager().getDirectory() + "/" + getName()); 1610 } 1611 1612 /** 1613 * Returns a cached Lucene term query filter for the given field and terms.<p> 1614 * 1615 * @param field the field to use 1616 * @param terms the term to use 1617 * 1618 * @return a cached Lucene term query filter for the given field and terms 1619 */ 1620 protected Query getMultiTermQueryFilter(String field, List<String> terms) { 1621 1622 return getMultiTermQueryFilter(field, null, terms); 1623 } 1624 1625 /** 1626 * Returns a cached Lucene term query filter for the given field and terms.<p> 1627 * 1628 * @param field the field to use 1629 * @param terms the term to use 1630 * 1631 * @return a cached Lucene term query filter for the given field and terms 1632 */ 1633 protected Query getMultiTermQueryFilter(String field, String terms) { 1634 1635 return getMultiTermQueryFilter(field, terms, null); 1636 } 1637 1638 /** 1639 * Returns a cached Lucene term query filter for the given field and terms.<p> 1640 * 1641 * @param field the field to use 1642 * @param termsStr the terms to use as a String separated by a space ' ' char 1643 * @param termsList the list of terms to use 1644 * 1645 * @return a cached Lucene term query filter for the given field and terms 1646 */ 1647 protected Query getMultiTermQueryFilter(String field, String termsStr, List<String> termsList) { 1648 1649 if (termsStr == null) { 1650 StringBuffer buf = new StringBuffer(64); 1651 for (int i = 0; i < termsList.size(); i++) { 1652 if (i > 0) { 1653 buf.append(' '); 1654 } 1655 buf.append(termsList.get(i)); 1656 } 1657 termsStr = buf.toString(); 1658 } 1659 Query result = m_displayFilters.get( 1660 (new StringBuffer(64)).append(field).append('|').append(termsStr).toString()); 1661 if (result == null) { 1662 List<Term> terms = new ArrayList<Term>(); 1663 if (termsList == null) { 1664 termsList = CmsStringUtil.splitAsList(termsStr, ' '); 1665 } 1666 for (int i = 0; i < termsList.size(); i++) { 1667 terms.add(new Term(field, termsList.get(i))); 1668 } 1669 1670 BooleanQuery.Builder build = new BooleanQuery.Builder(); 1671 terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD)); 1672 Query termsQuery = build.build(); //termsFilter 1673 1674 try { 1675 result = termsQuery.createWeight(m_indexSearcher, ScoreMode.COMPLETE_NO_SCORES, 1).getQuery(); 1676 m_displayFilters.put(field + termsStr, result); 1677 } catch (IOException e) { 1678 // TODO don't know what happend 1679 e.printStackTrace(); 1680 } 1681 } 1682 return result; 1683 } 1684 1685 /** 1686 * Checks if the OpenCms resource referenced by the result document can be read 1687 * by the user of the given OpenCms context. 1688 * 1689 * Returns the referenced <code>CmsResource</code> or <code>null</code> if 1690 * the user is not permitted to read the resource.<p> 1691 * 1692 * @param cms the OpenCms user context to use for permission testing 1693 * @param doc the search result document to check 1694 * 1695 * @return the referenced <code>CmsResource</code> or <code>null</code> if the user is not permitted 1696 */ 1697 protected CmsResource getResource(CmsObject cms, I_CmsSearchDocument doc) { 1698 1699 // check if the resource exits in the VFS, 1700 // this will implicitly check read permission and if the resource was deleted 1701 CmsResourceFilter filter = CmsResourceFilter.DEFAULT; 1702 if (isRequireViewPermission()) { 1703 filter = CmsResourceFilter.DEFAULT_ONLY_VISIBLE; 1704 } else if (isIgnoreExpiration()) { 1705 filter = CmsResourceFilter.IGNORE_EXPIRATION; 1706 } 1707 1708 return getResource(cms, doc, filter); 1709 } 1710 1711 /** 1712 * Checks if the OpenCms resource referenced by the result document can be read 1713 * by the user of the given OpenCms context. 1714 * 1715 * Returns the referenced <code>CmsResource</code> or <code>null</code> if 1716 * the user is not permitted to read the resource.<p> 1717 * 1718 * @param cms the OpenCms user context to use for permission testing 1719 * @param doc the search result document to check 1720 * @param filter the resource filter to apply 1721 * 1722 * @return the referenced <code>CmsResource</code> or <code>null</code> if the user is not permitted 1723 */ 1724 protected CmsResource getResource(CmsObject cms, I_CmsSearchDocument doc, CmsResourceFilter filter) { 1725 1726 try { 1727 CmsObject clone = OpenCms.initCmsObject(cms); 1728 clone.getRequestContext().setSiteRoot(""); 1729 return clone.readResource(doc.getPath(), filter); 1730 } catch (CmsException e) { 1731 // Do nothing 1732 } 1733 1734 return null; 1735 } 1736 1737 /** 1738 * Returns a cached Lucene term query filter for the given field and term.<p> 1739 * 1740 * @param field the field to use 1741 * @param term the term to use 1742 * 1743 * @return a cached Lucene term query filter for the given field and term 1744 */ 1745 protected Query getTermQueryFilter(String field, String term) { 1746 1747 return getMultiTermQueryFilter(field, term, Collections.singletonList(term)); 1748 } 1749 1750 /** 1751 * Checks if the OpenCms resource referenced by the result document can be read 1752 * be the user of the given OpenCms context.<p> 1753 * 1754 * @param cms the OpenCms user context to use for permission testing 1755 * @param doc the search result document to check 1756 * @return <code>true</code> if the user has read permissions to the resource 1757 */ 1758 protected boolean hasReadPermission(CmsObject cms, I_CmsSearchDocument doc) { 1759 1760 // If no permission check is needed: the document can be read 1761 // Else try to read the resource if this is not possible the user does not have enough permissions 1762 return !needsPermissionCheck(doc) ? true : (null != getResource(cms, doc)); 1763 } 1764 1765 /** 1766 * Closes the index searcher for this index.<p> 1767 * 1768 * @see #indexSearcherOpen(String) 1769 */ 1770 protected synchronized void indexSearcherClose() { 1771 1772 indexSearcherClose(m_indexSearcher); 1773 } 1774 1775 /** 1776 * Closes the given Lucene index searcher.<p> 1777 * 1778 * @param searcher the searcher to close 1779 */ 1780 protected synchronized void indexSearcherClose(IndexSearcher searcher) { 1781 1782 // in case there is an index searcher available close it 1783 if ((searcher != null) && (searcher.getIndexReader() != null)) { 1784 try { 1785 searcher.getIndexReader().close(); 1786 } catch (Exception e) { 1787 LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_CLOSE_1, getName()), e); 1788 } 1789 } 1790 } 1791 1792 /** 1793 * Initializes the index searcher for this index.<p> 1794 * 1795 * In case there is an index searcher still open, it is closed first.<p> 1796 * 1797 * For performance reasons, one instance of the index searcher should be kept 1798 * for all searches. However, if the index is updated or changed 1799 * this searcher instance needs to be re-initialized.<p> 1800 * 1801 * @param path the path to the index directory 1802 */ 1803 protected synchronized void indexSearcherOpen(String path) { 1804 1805 IndexSearcher oldSearcher = null; 1806 Directory indexDirectory = null; 1807 try { 1808 indexDirectory = FSDirectory.open(Paths.get(path)); 1809 if (DirectoryReader.indexExists(indexDirectory)) { 1810 IndexReader reader = UninvertingReader.wrap( 1811 DirectoryReader.open(indexDirectory), 1812 createUninvertingMap()); 1813 if (m_indexSearcher != null) { 1814 // store old searcher instance to close it later 1815 oldSearcher = m_indexSearcher; 1816 } 1817 m_indexSearcher = new IndexSearcher(reader); 1818 m_indexSearcher.setSimilarity(m_sim); 1819 m_displayFilters = new HashMap<>(); 1820 } 1821 } catch (IOException e) { 1822 LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_1, getName()), e); 1823 if (indexDirectory != null) { 1824 try { 1825 indexDirectory.close(); 1826 } catch (IOException closeException) { 1827 // do nothing 1828 } 1829 } 1830 } 1831 if (oldSearcher != null) { 1832 // close the old searcher if required 1833 indexSearcherClose(oldSearcher); 1834 } 1835 } 1836 1837 /** 1838 * Reopens the index search reader for this index, required after the index has been changed.<p> 1839 * 1840 * @see #indexSearcherOpen(String) 1841 */ 1842 protected synchronized void indexSearcherUpdate() { 1843 1844 IndexSearcher oldSearcher = m_indexSearcher; 1845 if ((oldSearcher != null) && (oldSearcher.getIndexReader() != null)) { 1846 // in case there is an index searcher available close it 1847 try { 1848 if (oldSearcher.getIndexReader() instanceof DirectoryReader) { 1849 IndexReader newReader = DirectoryReader.openIfChanged( 1850 (DirectoryReader)oldSearcher.getIndexReader()); 1851 if (newReader != null) { 1852 m_indexSearcher = new IndexSearcher(newReader); 1853 m_indexSearcher.setSimilarity(m_sim); 1854 indexSearcherClose(oldSearcher); 1855 } 1856 } 1857 } catch (Exception e) { 1858 LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_REOPEN_1, getName()), e); 1859 } 1860 } else { 1861 // make sure we end up with an open index searcher / reader 1862 indexSearcherOpen(getPath()); 1863 } 1864 } 1865 1866 /** 1867 * Checks if the document is in the time range specified in the search parameters.<p> 1868 * 1869 * The creation date and/or the last modification date are checked.<p> 1870 * 1871 * @param doc the document to check the dates against the given time range 1872 * @param params the search parameters where the time ranges are specified 1873 * 1874 * @return true if document is in time range or not time range set otherwise false 1875 */ 1876 protected boolean isInTimeRange(Document doc, CmsSearchParameters params) { 1877 1878 if (!isCheckingTimeRange()) { 1879 // time range check disabled 1880 return true; 1881 } 1882 1883 try { 1884 // check the creation date of the document against the given time range 1885 Date dateCreated = DateTools.stringToDate(doc.getField(CmsSearchField.FIELD_DATE_CREATED).stringValue()); 1886 if (dateCreated.getTime() < params.getMinDateCreated()) { 1887 return false; 1888 } 1889 if (dateCreated.getTime() > params.getMaxDateCreated()) { 1890 return false; 1891 } 1892 1893 // check the last modification date of the document against the given time range 1894 Date dateLastModified = DateTools.stringToDate( 1895 doc.getField(CmsSearchField.FIELD_DATE_LASTMODIFIED).stringValue()); 1896 if (dateLastModified.getTime() < params.getMinDateLastModified()) { 1897 return false; 1898 } 1899 if (dateLastModified.getTime() > params.getMaxDateLastModified()) { 1900 return false; 1901 } 1902 1903 } catch (ParseException ex) { 1904 // date could not be parsed -> doc is in time range 1905 } 1906 1907 return true; 1908 } 1909 1910 /** 1911 * Checks if the score for the results must be calculated based on the provided sort option.<p> 1912 * 1913 * Since Lucene 3 apparently the score is no longer calculated by default, but only if the 1914 * searcher is explicitly told so. This methods checks if, based on the given sort, 1915 * the score must be calculated.<p> 1916 * 1917 * @param searcher the index searcher to prepare 1918 * @param sort the sort option to use 1919 * 1920 * @return true if the sort option should be used 1921 */ 1922 protected boolean isSortScoring(IndexSearcher searcher, Sort sort) { 1923 1924 boolean doScoring = false; 1925 if (sort != null) { 1926 if ((sort == CmsSearchParameters.SORT_DEFAULT) || (sort == CmsSearchParameters.SORT_TITLE)) { 1927 // these default sorts do need score calculation 1928 doScoring = true; 1929 } else if ((sort == CmsSearchParameters.SORT_DATE_CREATED) 1930 || (sort == CmsSearchParameters.SORT_DATE_LASTMODIFIED)) { 1931 // these default sorts don't need score calculation 1932 doScoring = false; 1933 } else { 1934 // for all non-defaults: check if the score field is present, in that case we must calculate the score 1935 SortField[] fields = sort.getSort(); 1936 for (SortField field : fields) { 1937 if (field == SortField.FIELD_SCORE) { 1938 doScoring = true; 1939 break; 1940 } 1941 } 1942 } 1943 } 1944 return doScoring; 1945 } 1946 1947 /** 1948 * Checks if the OpenCms resource referenced by the result document needs to be checked.<p> 1949 * 1950 * @param doc the search result document to check 1951 * 1952 * @return <code>true</code> if the document needs to be checked <code>false</code> otherwise 1953 */ 1954 protected boolean needsPermissionCheck(I_CmsSearchDocument doc) { 1955 1956 if (!isCheckingPermissions()) { 1957 // no permission check is performed at all 1958 return false; 1959 } 1960 1961 if ((doc.getType() == null) || (doc.getPath() == null)) { 1962 // permission check needs only to be performed for VFS documents that contain both fields 1963 return false; 1964 } 1965 1966 if (!I_CmsSearchDocument.VFS_DOCUMENT_KEY_PREFIX.equals(doc.getType()) 1967 && !OpenCms.getResourceManager().hasResourceType(doc.getType())) { 1968 // this is an unknown VFS resource type (also not the generic "VFS" type of OpenCms before 7.0) 1969 return false; 1970 } 1971 return true; 1972 } 1973 1974 /** 1975 * Removes the given backup folder of this index.<p> 1976 * 1977 * @param path the backup folder to remove 1978 */ 1979 protected void removeIndexBackup(String path) { 1980 1981 if (!isBackupReindexing()) { 1982 // if no backup is generated we don't need to do anything 1983 return; 1984 } 1985 1986 // check if the target directory already exists 1987 File file = new File(path); 1988 if (!file.exists()) { 1989 // index does not exist yet 1990 return; 1991 } 1992 try { 1993 FSDirectory dir = FSDirectory.open(file.toPath()); 1994 dir.close(); 1995 CmsFileUtil.purgeDirectory(file); 1996 } catch (Exception e) { 1997 LOG.error(Messages.get().getBundle().key(Messages.LOG_IO_INDEX_BACKUP_REMOVE_2, getName(), path), e); 1998 } 1999 } 2000 2001 /** 2002 * Generates the uninverting map and adds it to the field configuration. 2003 * @return the generated uninverting map 2004 * 2005 * @see CmsSearchField#addUninvertingMappings(Map) 2006 */ 2007 private Map<String, Type> createUninvertingMap() { 2008 2009 Map<String, UninvertingReader.Type> uninvertingMap = new HashMap<String, UninvertingReader.Type>(); 2010 CmsSearchField.addUninvertingMappings(uninvertingMap); 2011 getFieldConfiguration().addUninvertingMappings(uninvertingMap); 2012 return uninvertingMap; 2013 } 2014 2015}