001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.db.CmsPublishedResource; 031import org.opencms.file.CmsObject; 032import org.opencms.file.CmsProject; 033import org.opencms.file.CmsResource; 034import org.opencms.file.CmsResourceFilter; 035import org.opencms.main.CmsException; 036import org.opencms.main.CmsLog; 037import org.opencms.report.I_CmsReport; 038import org.opencms.util.CmsUUID; 039 040import java.io.IOException; 041import java.util.ArrayList; 042import java.util.Iterator; 043import java.util.List; 044 045import org.apache.commons.logging.Log; 046 047/** 048 * An indexer indexing {@link CmsResource} based content from the OpenCms VFS.<p> 049 * 050 * @since 6.0.0 051 */ 052public class CmsVfsIndexer implements I_CmsIndexer { 053 054 /** The log object for this class. */ 055 private static final Log LOG = CmsLog.getLog(CmsVfsIndexer.class); 056 057 // Note: The following member variables must all be "protected" (not "private") since 058 // in case the indexer is extended, the factory method "newInstance()" needs to set them. 059 060 /** The OpenCms user context to use when reading resources from the VFS during indexing. */ 061 protected CmsObject m_cms; 062 063 /** The index. */ 064 protected I_CmsSearchIndex m_index; 065 066 /** The report. */ 067 protected I_CmsReport m_report; 068 069 /** 070 * @see org.opencms.search.I_CmsIndexer#deleteResources(org.opencms.search.I_CmsIndexWriter, java.util.List) 071 */ 072 public void deleteResources(I_CmsIndexWriter indexWriter, List<CmsPublishedResource> resourcesToDelete) { 073 074 if ((resourcesToDelete == null) || resourcesToDelete.isEmpty()) { 075 // nothing to delete 076 return; 077 } 078 079 // contains all resources already deleted to avoid multiple deleting in case of siblings 080 List<CmsUUID> resourcesAlreadyDeleted = new ArrayList<CmsUUID>(resourcesToDelete.size()); 081 082 Iterator<CmsPublishedResource> i = resourcesToDelete.iterator(); 083 while (i.hasNext()) { 084 // iterate all resources in the given list of resources to delete 085 CmsPublishedResource res = i.next(); 086 if (!resourcesAlreadyDeleted.contains(res.getStructureId())) { 087 // ensure siblings are only deleted once per update 088 resourcesAlreadyDeleted.add(res.getStructureId()); 089 if (!res.isFolder() && !CmsResource.isTemporaryFileName(res.getRootPath())) { 090 // now delete the resource from the index 091 deleteResource(indexWriter, res); 092 } 093 } 094 } 095 } 096 097 /** 098 * Returns the OpenCms user context used by this indexer.<p> 099 * 100 * @return the OpenCms user context used by this indexer 101 */ 102 public CmsObject getCms() { 103 104 return m_cms; 105 } 106 107 /** 108 * Returns the OpenCms search index updated by this indexer.<p> 109 * 110 * @return the OpenCms search index updated by this indexer 111 */ 112 public I_CmsSearchIndex getIndex() { 113 114 return m_index; 115 } 116 117 /** 118 * Returns the report used by this indexer.<p> 119 * 120 * @return the report used by this indexer 121 */ 122 public I_CmsReport getReport() { 123 124 return m_report; 125 } 126 127 /** 128 * @see org.opencms.search.I_CmsIndexer#getUpdateData(org.opencms.search.CmsSearchIndexSource, java.util.List) 129 */ 130 public CmsSearchIndexUpdateData getUpdateData( 131 CmsSearchIndexSource source, 132 List<CmsPublishedResource> publishedResources) { 133 134 // create a new update collection from this indexer and the given index source 135 CmsSearchIndexUpdateData result = new CmsSearchIndexUpdateData(source, this); 136 137 Iterator<CmsPublishedResource> i = publishedResources.iterator(); 138 while (i.hasNext()) { 139 // check all published resources if they match this indexer / source 140 CmsPublishedResource pubRes = i.next(); 141 // VFS resources will always have a structure id 142 if (!pubRes.getStructureId().isNullUUID()) { 143 // use utility method from CmsProject to check if published resource is "inside" this index source 144 if (CmsProject.isInsideProject(source.getResourcesNames(), pubRes.getRootPath())) { 145 // the resource is "inside" this index source 146 addResourceToUpdateData(pubRes, result); 147 } 148 } 149 } 150 return result; 151 } 152 153 /** 154 * The default indexer is not able to resolve locale dependencies between documents.<p> 155 * 156 * @see org.opencms.search.I_CmsIndexer#isLocaleDependenciesEnable() 157 */ 158 public boolean isLocaleDependenciesEnable() { 159 160 return false; 161 } 162 163 /** 164 * @see org.opencms.search.I_CmsIndexer#newInstance(org.opencms.file.CmsObject, org.opencms.report.I_CmsReport, org.opencms.search.I_CmsSearchIndex) 165 */ 166 public I_CmsIndexer newInstance(CmsObject cms, I_CmsReport report, I_CmsSearchIndex index) { 167 168 CmsVfsIndexer indexer = null; 169 try { 170 indexer = getClass().newInstance(); 171 indexer.m_cms = cms; 172 indexer.m_report = report; 173 indexer.m_index = index; 174 } catch (Exception e) { 175 LOG.error( 176 Messages.get().getBundle().key( 177 Messages.ERR_INDEXSOURCE_INDEXER_CLASS_NAME_2, 178 getClass().getName(), 179 CmsVfsIndexer.class), 180 e); 181 } 182 return indexer; 183 } 184 185 /** 186 * @see org.opencms.search.I_CmsIndexer#rebuildIndex(org.opencms.search.I_CmsIndexWriter, org.opencms.search.CmsIndexingThreadManager, org.opencms.search.CmsSearchIndexSource) 187 */ 188 public void rebuildIndex( 189 I_CmsIndexWriter writer, 190 CmsIndexingThreadManager threadManager, 191 CmsSearchIndexSource source) 192 throws CmsIndexException { 193 194 List<String> resourceNames = source.getResourcesNames(); 195 Iterator<String> i = resourceNames.iterator(); 196 while (i.hasNext()) { 197 // read the resources from all configured source folders 198 String resourceName = i.next(); 199 List<CmsResource> resources = null; 200 try { 201 // read all resources (only files) below the given path 202 resources = m_cms.readResources(resourceName, CmsResourceFilter.IGNORE_EXPIRATION.addRequireFile()); 203 } catch (CmsException e) { 204 if (m_report != null) { 205 m_report.println( 206 Messages.get().container( 207 Messages.RPT_UNABLE_TO_READ_SOURCE_2, 208 resourceName, 209 e.getLocalizedMessage()), 210 I_CmsReport.FORMAT_WARNING); 211 } 212 if (LOG.isWarnEnabled()) { 213 LOG.warn( 214 Messages.get().getBundle().key( 215 Messages.LOG_UNABLE_TO_READ_SOURCE_2, 216 resourceName, 217 m_index.getName()), 218 e); 219 } 220 } 221 if (resources != null) { 222 // iterate all resources found in the folder 223 Iterator<CmsResource> j = resources.iterator(); 224 while (j.hasNext()) { 225 // now update all the resources individually 226 CmsResource resource = j.next(); 227 updateResource(writer, threadManager, resource); 228 } 229 } 230 } 231 } 232 233 /** 234 * @see org.opencms.search.I_CmsIndexer#updateResources(org.opencms.search.I_CmsIndexWriter, org.opencms.search.CmsIndexingThreadManager, java.util.List) 235 */ 236 public void updateResources( 237 I_CmsIndexWriter writer, 238 CmsIndexingThreadManager threadManager, 239 List<CmsPublishedResource> resourcesToUpdate) 240 throws CmsIndexException { 241 242 if ((resourcesToUpdate == null) || resourcesToUpdate.isEmpty()) { 243 // nothing to update 244 return; 245 } 246 247 // contains all resources already updated to avoid multiple updates in case of siblings 248 List<String> resourcesAlreadyUpdated = new ArrayList<String>(resourcesToUpdate.size()); 249 250 // index all resources that are in the given list 251 Iterator<CmsPublishedResource> i = resourcesToUpdate.iterator(); 252 while (i.hasNext()) { 253 CmsPublishedResource res = i.next(); 254 CmsResource resource = null; 255 if (!CmsResource.isTemporaryFileName(res.getRootPath())) { 256 try { 257 resource = m_cms.readResource(res.getRootPath(), CmsResourceFilter.IGNORE_EXPIRATION); 258 } catch (CmsException e) { 259 if (LOG.isWarnEnabled()) { 260 LOG.warn( 261 Messages.get().getBundle().key( 262 Messages.LOG_UNABLE_TO_READ_RESOURCE_2, 263 res.getRootPath(), 264 m_index.getName()), 265 e); 266 } 267 } 268 269 if (resource != null) { 270 if (!resourcesAlreadyUpdated.contains(resource.getRootPath())) { 271 // ensure resources are only indexed once per update 272 resourcesAlreadyUpdated.add(resource.getRootPath()); 273 updateResource(writer, threadManager, resource); 274 } 275 } 276 } 277 } 278 } 279 280 /** 281 * Adds a given published resource to the provided search index update data.<p> 282 * 283 * This method decides if the resource has to be included in the "update" or "delete" list.<p> 284 * 285 * @param pubRes the published resource to add 286 * @param updateData the search index update data to add the resource to 287 */ 288 protected void addResourceToUpdateData(CmsPublishedResource pubRes, CmsSearchIndexUpdateData updateData) { 289 290 if (pubRes.getState().isDeleted()) { 291 // deleted resource just needs to be removed 292 updateData.addResourceToDelete(pubRes); 293 } else if (pubRes.getState().isNew() || pubRes.getState().isChanged() || pubRes.getState().isUnchanged()) { 294 updateData.addResourceToUpdate(pubRes); 295 } 296 } 297 298 /** 299 * Deletes a resource with the given index writer.<p> 300 * 301 * @param indexWriter the index writer to resource the resource with 302 * @param resource the root path of the resource to delete 303 */ 304 protected void deleteResource(I_CmsIndexWriter indexWriter, CmsPublishedResource resource) { 305 306 try { 307 if (LOG.isInfoEnabled()) { 308 LOG.info(Messages.get().getBundle().key(Messages.LOG_DELETING_FROM_INDEX_1, resource.getRootPath())); 309 } 310 // delete all documents with this term from the index 311 indexWriter.deleteDocument(resource); 312 } catch (IOException e) { 313 if (LOG.isWarnEnabled()) { 314 LOG.warn( 315 Messages.get().getBundle().key( 316 Messages.LOG_IO_INDEX_DOCUMENT_DELETE_2, 317 resource.getRootPath(), 318 m_index.getName()), 319 e); 320 } 321 } 322 } 323 324 /** 325 * Checks if the published resource is inside the time window set with release and expiration date.<p> 326 * 327 * @param resource the published resource to check 328 * @return true if the published resource is inside the time window, otherwise false 329 */ 330 protected boolean isResourceInTimeWindow(CmsPublishedResource resource) { 331 332 return m_cms.existsResource( 333 m_cms.getRequestContext().removeSiteRoot(resource.getRootPath()), 334 CmsResourceFilter.DEFAULT); 335 } 336 337 /** 338 * Updates (writes) a single resource in the index.<p> 339 * 340 * @param writer the index writer to use 341 * @param threadManager the thread manager to use when extracting the document text 342 * @param resource the resource to update 343 * 344 * @throws CmsIndexException if something goes wrong 345 */ 346 protected void updateResource(I_CmsIndexWriter writer, CmsIndexingThreadManager threadManager, CmsResource resource) 347 throws CmsIndexException { 348 349 if (resource.isFolder() || resource.isTemporaryFile()) { 350 // don't ever index folders or temporary files 351 return; 352 } 353 try { 354 // create the index thread for the resource 355 threadManager.createIndexingThread(this, writer, resource); 356 } catch (Exception e) { 357 358 if (m_report != null) { 359 m_report.println( 360 Messages.get().container(Messages.RPT_SEARCH_INDEXING_FAILED_0), 361 I_CmsReport.FORMAT_WARNING); 362 } 363 if (LOG.isWarnEnabled()) { 364 LOG.warn( 365 Messages.get().getBundle().key( 366 Messages.ERR_INDEX_RESOURCE_FAILED_2, 367 resource.getRootPath(), 368 m_index.getName()), 369 e); 370 } 371 throw new CmsIndexException( 372 Messages.get().container( 373 Messages.ERR_INDEX_RESOURCE_FAILED_2, 374 resource.getRootPath(), 375 m_index.getName())); 376 } 377 } 378 379 /** 380 * Updates a resource with the given index writer and the new document provided.<p> 381 * 382 * @param indexWriter the index writer to update the resource with 383 * @param rootPath the root path of the resource to update 384 * @param doc the new document for the resource 385 */ 386 protected void updateResource(I_CmsIndexWriter indexWriter, String rootPath, I_CmsSearchDocument doc) { 387 388 try { 389 indexWriter.updateDocument(rootPath, doc); 390 } catch (Exception e) { 391 if (LOG.isWarnEnabled()) { 392 LOG.warn( 393 Messages.get().getBundle().key( 394 Messages.LOG_IO_INDEX_DOCUMENT_UPDATE_2, 395 rootPath, 396 m_index.getName()), 397 e); 398 } 399 } 400 } 401}