001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.staticexport; 029 030import org.opencms.file.CmsObject; 031import org.opencms.file.CmsPropertyDefinition; 032import org.opencms.file.wrapper.CmsObjectWrapper; 033import org.opencms.i18n.CmsEncoder; 034import org.opencms.main.CmsException; 035import org.opencms.main.OpenCms; 036import org.opencms.relations.CmsLink; 037import org.opencms.relations.CmsRelationType; 038import org.opencms.util.CmsHtmlParser; 039import org.opencms.util.CmsMacroResolver; 040import org.opencms.util.CmsRequestUtil; 041import org.opencms.util.CmsStringUtil; 042 043import java.util.Vector; 044 045import org.htmlparser.Attribute; 046import org.htmlparser.Node; 047import org.htmlparser.Tag; 048import org.htmlparser.tags.ImageTag; 049import org.htmlparser.tags.LinkTag; 050import org.htmlparser.tags.ObjectTag; 051import org.htmlparser.util.ParserException; 052import org.htmlparser.util.SimpleNodeIterator; 053 054/** 055 * Implements the HTML parser node visitor pattern to 056 * exchange all links on the page.<p> 057 * 058 * @since 6.0.0 059 */ 060public class CmsLinkProcessor extends CmsHtmlParser { 061 062 /** Constant for the attribute name. */ 063 public static final String ATTRIBUTE_HREF = "href"; 064 065 /** Constant for the attribute name. */ 066 public static final String ATTRIBUTE_SRC = "src"; 067 068 /** Constant for the attribute name. */ 069 public static final String ATTRIBUTE_VALUE = "value"; 070 071 /** HTML end. */ 072 public static final String HTML_END = "</body></html>"; 073 074 /** HTML start. */ 075 public static final String HTML_START = "<html><body>"; 076 077 /** Constant for the tag name. */ 078 public static final String TAG_AREA = "AREA"; 079 080 /** Constant for the tag name. */ 081 public static final String TAG_EMBED = "EMBED"; 082 083 /** Constant for the tag name. */ 084 public static final String TAG_PARAM = "PARAM"; 085 086 /** List of attributes that may contain links for the embed tag. */ 087 private static final String[] EMBED_TAG_LINKED_ATTRIBS = new String[] {ATTRIBUTE_SRC, "pluginurl", "pluginspage"}; 088 089 /** List of attributes that may contain links for the object tag ("codebase" has to be first). */ 090 private static final String[] OBJECT_TAG_LINKED_ATTRIBS = new String[] {"codebase", "data", "datasrc"}; 091 092 /** Processing mode "process links". */ 093 private static final int PROCESS_LINKS = 1; 094 095 /** Processing mode "replace links". */ 096 private static final int REPLACE_LINKS = 0; 097 098 /** The current users OpenCms context, containing the users permission and site root context. */ 099 private CmsObject m_cms; 100 101 /** The selected encoding to use for parsing the HTML. */ 102 private String m_encoding; 103 104 /** The link table used for link macro replacements. */ 105 private CmsLinkTable m_linkTable; 106 107 /** Current processing mode. */ 108 private int m_mode; 109 110 /** The relative path for relative links, if not set, relative links are treated as external links. */ 111 private String m_relativePath; 112 113 /** Another OpenCms context based on the current users OpenCms context, but with the site root set to '/'. */ 114 private CmsObject m_rootCms; 115 116 /** 117 * Creates a new link processor.<p> 118 * 119 * @param cms the current users OpenCms context 120 * @param linkTable the link table to use 121 * @param encoding the encoding to use for parsing the HTML content 122 * @param relativePath additional path for links with relative path (only used in "replace" mode) 123 */ 124 public CmsLinkProcessor(CmsObject cms, CmsLinkTable linkTable, String encoding, String relativePath) { 125 126 // echo mode must be on for link processor 127 super(true); 128 129 m_cms = cms; 130 if (m_cms != null) { 131 try { 132 m_rootCms = OpenCms.initCmsObject(cms); 133 m_rootCms.getRequestContext().setSiteRoot("/"); 134 } catch (CmsException e) { 135 // this should not happen 136 m_rootCms = null; 137 } 138 } 139 m_linkTable = linkTable; 140 m_encoding = encoding; 141 m_relativePath = relativePath; 142 } 143 144 /** 145 * Escapes all <code>&</code>, e.g. replaces them with a <code>&</code>.<p> 146 * 147 * @param source the String to escape 148 * @return the escaped String 149 */ 150 public static String escapeLink(String source) { 151 152 if (source == null) { 153 return null; 154 } 155 StringBuffer result = new StringBuffer(source.length() * 2); 156 int terminatorIndex; 157 for (int i = 0; i < source.length(); ++i) { 158 char ch = source.charAt(i); 159 switch (ch) { 160 case '&': 161 // don't escape already escaped &s; 162 terminatorIndex = source.indexOf(';', i); 163 if (terminatorIndex > 0) { 164 String substr = source.substring(i + 1, terminatorIndex); 165 if ("amp".equals(substr)) { 166 result.append(ch); 167 } else { 168 result.append("&"); 169 } 170 } else { 171 result.append("&"); 172 } 173 break; 174 default: 175 result.append(ch); 176 } 177 } 178 return new String(result); 179 } 180 181 /** 182 * Unescapes all <code>&amp;</code>, that is replaces them with a <code>&</code>.<p> 183 * 184 * @param source the String to unescape 185 * @return the unescaped String 186 */ 187 public static String unescapeLink(String source) { 188 189 if (source == null) { 190 return null; 191 } 192 return CmsStringUtil.substitute(source, "&", "&"); 193 194 } 195 196 /** 197 * Returns the link table this link processor was initialized with.<p> 198 * 199 * @return the link table this link processor was initialized with 200 */ 201 public CmsLinkTable getLinkTable() { 202 203 return m_linkTable; 204 } 205 206 /** 207 * Starts link processing for the given content in processing mode.<p> 208 * 209 * Macros are replaced by links.<p> 210 * 211 * @param content the content to process 212 * @return the processed content with replaced macros 213 * 214 * @throws ParserException if something goes wrong 215 */ 216 public String processLinks(String content) throws ParserException { 217 218 m_mode = PROCESS_LINKS; 219 return process(content, m_encoding); 220 } 221 222 /** 223 * Starts link processing for the given content in replacement mode.<p> 224 * 225 * Links are replaced by macros.<p> 226 * 227 * @param content the content to process 228 * @return the processed content with replaced links 229 * 230 * @throws ParserException if something goes wrong 231 */ 232 public String replaceLinks(String content) throws ParserException { 233 234 m_mode = REPLACE_LINKS; 235 return process(content, m_encoding); 236 } 237 238 /** 239 * Visitor method to process a tag (start).<p> 240 * 241 * @param tag the tag to process 242 */ 243 @Override 244 public void visitTag(Tag tag) { 245 246 if (tag instanceof LinkTag) { 247 processLinkTag((LinkTag)tag); 248 } else if (tag instanceof ImageTag) { 249 processImageTag((ImageTag)tag); 250 } else if (tag instanceof ObjectTag) { 251 processObjectTag((ObjectTag)tag); 252 } else { 253 // there are no specialized tag classes for these tags :( 254 if (TAG_EMBED.equals(tag.getTagName())) { 255 processEmbedTag(tag); 256 } else if (TAG_AREA.equals(tag.getTagName())) { 257 processAreaTag(tag); 258 } 259 } 260 // append text content of the tag (may have been changed by above methods) 261 super.visitTag(tag); 262 } 263 264 /** 265 * Process an area tag.<p> 266 * 267 * @param tag the tag to process 268 */ 269 protected void processAreaTag(Tag tag) { 270 271 processLink(tag, ATTRIBUTE_HREF, CmsRelationType.HYPERLINK); 272 } 273 274 /** 275 * Process an embed tag.<p> 276 * 277 * @param tag the tag to process 278 */ 279 protected void processEmbedTag(Tag tag) { 280 281 for (int i = 0; i < EMBED_TAG_LINKED_ATTRIBS.length; i++) { 282 String attr = EMBED_TAG_LINKED_ATTRIBS[i]; 283 processLink(tag, attr, CmsRelationType.EMBEDDED_OBJECT); 284 } 285 } 286 287 /** 288 * Process an image tag.<p> 289 * 290 * @param tag the tag to process 291 */ 292 protected void processImageTag(ImageTag tag) { 293 294 processLink(tag, ATTRIBUTE_SRC, CmsRelationType.valueOf(tag.getTagName())); 295 } 296 297 /** 298 * Process a tag having a link in the given attribute, considering the link as the given type.<p> 299 * 300 * @param tag the tag to process 301 * @param attr the attribute 302 * @param type the link type 303 */ 304 protected void processLink(Tag tag, String attr, CmsRelationType type) { 305 306 if (tag.getAttribute(attr) == null) { 307 return; 308 } 309 CmsLink link = null; 310 switch (m_mode) { 311 case PROCESS_LINKS: 312 // macros are replaced with links 313 link = m_linkTable.getLink(CmsMacroResolver.stripMacro(tag.getAttribute(attr))); 314 if (link != null) { 315 // link management check 316 String l = link.getLink(m_cms); 317 if (TAG_PARAM.equals(tag.getTagName())) { 318 // HACK: to distinguish link parameters the link itself has to end with '&' or '?' 319 // another solution should be a kind of macro... 320 if (!l.endsWith(CmsRequestUtil.URL_DELIMITER) 321 && !l.endsWith(CmsRequestUtil.PARAMETER_DELIMITER)) { 322 if (l.indexOf(CmsRequestUtil.URL_DELIMITER) > 0) { 323 l += CmsRequestUtil.PARAMETER_DELIMITER; 324 } else { 325 l += CmsRequestUtil.URL_DELIMITER; 326 } 327 } 328 } 329 // set the real target 330 tag.setAttribute(attr, CmsEncoder.escapeXml(l)); 331 } 332 break; 333 case REPLACE_LINKS: 334 // links are replaced with macros 335 String targetUri = tag.getAttribute(attr); 336 if (CmsStringUtil.isNotEmpty(targetUri)) { 337 String internalUri = null; 338 if (!CmsMacroResolver.isMacro(targetUri)) { 339 m_cms.getRequestContext().setAttribute( 340 CmsDefaultLinkSubstitutionHandler.DONT_USE_CURRENT_SITE_FOR_WORKPLACE_REQUESTS, 341 "true"); 342 internalUri = OpenCms.getLinkManager().getRootPath(m_cms, targetUri, m_relativePath); 343 } 344 // HACK: to distinguish link parameters the link itself has to end with '&' or '?' 345 // another solution should be a kind of macro... 346 if (!TAG_PARAM.equals(tag.getTagName()) 347 || targetUri.endsWith(CmsRequestUtil.URL_DELIMITER) 348 || targetUri.endsWith(CmsRequestUtil.PARAMETER_DELIMITER)) { 349 if (internalUri != null) { 350 internalUri = rewriteUri(internalUri); 351 // this is an internal link 352 link = m_linkTable.addLink(type, internalUri, true); 353 // link management check 354 link.checkConsistency(m_cms); 355 356 if ("IMG".equals(tag.getTagName()) || TAG_AREA.equals(tag.getTagName())) { 357 // now ensure the image has the "alt" attribute set 358 setAltAttributeFromTitle(tag, internalUri); 359 } 360 } else { 361 // this is an external link 362 link = m_linkTable.addLink(type, targetUri, false); 363 } 364 } 365 if (link != null) { 366 tag.setAttribute(attr, CmsMacroResolver.formatMacro(link.getName())); 367 } 368 } 369 break; 370 default: // empty 371 } 372 } 373 374 /** 375 * Process a link tag.<p> 376 * 377 * @param tag the tag to process 378 */ 379 protected void processLinkTag(LinkTag tag) { 380 381 processLink(tag, ATTRIBUTE_HREF, CmsRelationType.valueOf(tag.getTagName())); 382 } 383 384 /** 385 * Process an object tag.<p> 386 * 387 * @param tag the tag to process 388 */ 389 protected void processObjectTag(ObjectTag tag) { 390 391 CmsRelationType type = CmsRelationType.valueOf(tag.getTagName()); 392 for (int i = 0; i < OBJECT_TAG_LINKED_ATTRIBS.length; i++) { 393 String attr = OBJECT_TAG_LINKED_ATTRIBS[i]; 394 processLink(tag, attr, type); 395 if ((i == 0) && (tag.getAttribute(attr) != null)) { 396 // if code base is available, the other attributes are relative to it, so do not process them 397 break; 398 } 399 } 400 SimpleNodeIterator itChildren = tag.children(); 401 while (itChildren.hasMoreNodes()) { 402 Node node = itChildren.nextNode(); 403 if (node instanceof Tag) { 404 Tag childTag = (Tag)node; 405 if (TAG_PARAM.equals(childTag.getTagName())) { 406 processLink(childTag, ATTRIBUTE_VALUE, type); 407 } 408 } 409 } 410 } 411 412 /** 413 * Ensures that the given tag has the "alt" attribute set.<p> 414 * 415 * if not set, it will be set from the title of the given resource.<p> 416 * 417 * @param tag the tag to set the alt attribute for 418 * @param internalUri the internal URI to get the title from 419 */ 420 protected void setAltAttributeFromTitle(Tag tag, String internalUri) { 421 422 boolean hasAltAttrib = (tag.getAttribute("alt") != null); 423 if (!hasAltAttrib) { 424 String value = null; 425 if ((internalUri != null) && (m_rootCms != null)) { 426 // internal image: try to read the "alt" text from the "Title" property 427 try { 428 value = m_rootCms.readPropertyObject( 429 internalUri, 430 CmsPropertyDefinition.PROPERTY_TITLE, 431 false).getValue(); 432 } catch (CmsException e) { 433 // property can't be read, ignore 434 } 435 } 436 // some editors add a "/" at the end of the tag, we must make sure to insert before that 437 @SuppressWarnings("unchecked") 438 Vector<Attribute> attrs = tag.getAttributesEx(); 439 // first element is always the tag name 440 attrs.add(1, new Attribute(" ")); 441 attrs.add(2, new Attribute("alt", value == null ? "" : value, '"')); 442 } 443 } 444 445 /** 446 * Use the {@link org.opencms.file.wrapper.CmsObjectWrapper} to restore the link in the VFS.<p> 447 * 448 * @param internalUri the internal URI to restore 449 * 450 * @return the restored URI 451 */ 452 private String rewriteUri(String internalUri) { 453 454 // if an object wrapper is used, rewrite the uri 455 if (m_cms != null) { 456 Object obj = m_cms.getRequestContext().getAttribute(CmsObjectWrapper.ATTRIBUTE_NAME); 457 if (obj != null) { 458 CmsObjectWrapper wrapper = (CmsObjectWrapper)obj; 459 return wrapper.restoreLink(internalUri); 460 } 461 } 462 463 return internalUri; 464 } 465}