001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml.types; 029 030import org.opencms.file.CmsObject; 031import org.opencms.i18n.CmsEncoder; 032import org.opencms.main.CmsLog; 033import org.opencms.main.CmsRuntimeException; 034import org.opencms.relations.CmsLink; 035import org.opencms.relations.CmsLinkUpdateUtil; 036import org.opencms.staticexport.CmsLinkProcessor; 037import org.opencms.staticexport.CmsLinkTable; 038import org.opencms.util.CmsHtmlConverter; 039import org.opencms.util.CmsHtmlExtractor; 040import org.opencms.util.CmsStringUtil; 041import org.opencms.xml.CmsXmlGenericWrapper; 042import org.opencms.xml.I_CmsXmlDocument; 043import org.opencms.xml.page.CmsXmlPage; 044 045import java.util.Iterator; 046import java.util.Locale; 047 048import org.apache.commons.logging.Log; 049 050import org.dom4j.Attribute; 051import org.dom4j.Element; 052import org.htmlparser.util.ParserException; 053 054/** 055 * Describes the XML content type "OpenCmsHtml".<p> 056 * 057 * @since 6.0.0 058 */ 059public class CmsXmlHtmlValue extends A_CmsXmlContentValue { 060 061 /** The name of this type as used in the XML schema. */ 062 public static final String TYPE_NAME = "OpenCmsHtml"; 063 064 /** The log object for this class. */ 065 private static final Log LOG = CmsLog.getLog(CmsXmlHtmlValue.class); 066 067 /** The schema definition String is located in a text for easier editing. */ 068 private static String m_schemaDefinition; 069 070 /** Null value for plain text extraction errors. */ 071 private static final String NULL_VALUE = "null"; 072 073 /** Base type for single type instances, required for XML pages. */ 074 private static final I_CmsXmlSchemaType TYPE_BASE = new CmsXmlHtmlValue("base", "1", "1"); 075 076 /** The plain text value of the element node. */ 077 private String m_plainTextValue; 078 079 /** The String value of the element node. */ 080 private String m_stringValue; 081 082 /** 083 * Creates a new, empty schema type descriptor of type "OpenCmsHtml".<p> 084 */ 085 public CmsXmlHtmlValue() { 086 087 // empty constructor is required for class registration 088 } 089 090 /** 091 * Creates a new XML content value of type "OpenCmsHtml".<p> 092 * 093 * @param document the XML content instance this value belongs to 094 * @param element the XML element that contains this value 095 * @param locale the locale this value is created for 096 */ 097 public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale) { 098 099 super(document, element, locale, TYPE_BASE); 100 } 101 102 /** 103 * Creates a new XML content value of type "OpenCmsHtml".<p> 104 * 105 * @param document the XML content instance this value belongs to 106 * @param element the XML element that contains this value 107 * @param locale the locale this value is created for 108 * @param type the type instance to create the value for 109 */ 110 public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale, I_CmsXmlSchemaType type) { 111 112 super(document, element, locale, type); 113 } 114 115 /** 116 * Creates a new schema type descriptor for the type "OpenCmsHtml".<p> 117 * 118 * @param name the name of the XML node containing the value according to the XML schema 119 * @param minOccurs minimum number of occurrences of this type according to the XML schema 120 * @param maxOccurs maximum number of occurrences of this type according to the XML schema 121 */ 122 public CmsXmlHtmlValue(String name, String minOccurs, String maxOccurs) { 123 124 super(name, minOccurs, maxOccurs); 125 } 126 127 /** 128 * @see org.opencms.xml.types.A_CmsXmlContentValue#createValue(I_CmsXmlDocument, org.dom4j.Element, Locale) 129 */ 130 public I_CmsXmlContentValue createValue(I_CmsXmlDocument document, Element element, Locale locale) { 131 132 return new CmsXmlHtmlValue(document, element, locale, this); 133 } 134 135 /** 136 * @see org.opencms.xml.types.I_CmsXmlSchemaType#generateXml(org.opencms.file.CmsObject, org.opencms.xml.I_CmsXmlDocument, org.dom4j.Element, java.util.Locale) 137 */ 138 @Override 139 public Element generateXml(CmsObject cms, I_CmsXmlDocument document, Element root, Locale locale) { 140 141 Element element = root.addElement(getName()); 142 int index = element.getParent().elements(element.getQName()).indexOf(element); 143 element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index); 144 element.addElement(CmsXmlPage.NODE_LINKS); 145 element.addElement(CmsXmlPage.NODE_CONTENT); 146 147 // get the default value from the content handler 148 String defaultValue = document.getHandler().getDefault(cms, this, locale); 149 if (defaultValue != null) { 150 try { 151 I_CmsXmlContentValue value = createValue(document, element, locale); 152 value.setStringValue(cms, defaultValue); 153 } catch (CmsRuntimeException e) { 154 // should not happen if default value is correct 155 LOG.error( 156 Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_INVALID_ELEM_DEFAULT_1, defaultValue), 157 e); 158 element.clearContent(); 159 } 160 } 161 return element; 162 } 163 164 /** 165 * Returns the link table of this XML page element.<p> 166 * 167 * @return the link table of this XML page element 168 */ 169 public CmsLinkTable getLinkTable() { 170 171 CmsLinkTable linkTable = new CmsLinkTable(); 172 Element links = m_element.element(CmsXmlPage.NODE_LINKS); 173 if (links != null) { 174 Iterator<Element> itLinks = CmsXmlGenericWrapper.elementIterator(links, CmsXmlPage.NODE_LINK); 175 while (itLinks.hasNext()) { 176 Element lelem = itLinks.next(); 177 linkTable.addLink(new CmsLink(lelem)); 178 } 179 } 180 return linkTable; 181 } 182 183 /** 184 * @see org.opencms.xml.types.I_CmsXmlContentValue#getPlainText(org.opencms.file.CmsObject) 185 */ 186 @Override 187 public String getPlainText(CmsObject cms) { 188 189 if (m_plainTextValue == null) { 190 try { 191 m_plainTextValue = CmsHtmlExtractor.extractText(getStringValue(cms), m_document.getEncoding()); 192 } catch (Exception exc) { 193 m_plainTextValue = NULL_VALUE; 194 } 195 } 196 if (m_plainTextValue == NULL_VALUE) { 197 return null; 198 } 199 return m_plainTextValue; 200 } 201 202 /** 203 * @see org.opencms.xml.types.I_CmsXmlSchemaType#getSchemaDefinition() 204 */ 205 public String getSchemaDefinition() { 206 207 // the schema definition is located in a separate file for easier editing 208 if (m_schemaDefinition == null) { 209 m_schemaDefinition = readSchemaDefinition("org/opencms/xml/types/XmlHtmlValue.xsd"); 210 } 211 return m_schemaDefinition; 212 } 213 214 /** 215 * @see org.opencms.xml.types.I_CmsXmlContentValue#getStringValue(org.opencms.file.CmsObject) 216 */ 217 public String getStringValue(CmsObject cms) { 218 219 if (m_stringValue == null) { 220 m_stringValue = createStringValue(cms, m_document); 221 } 222 223 return m_stringValue; 224 } 225 226 /** 227 * @see org.opencms.xml.types.A_CmsXmlContentValue#getTypeName() 228 */ 229 public String getTypeName() { 230 231 return TYPE_NAME; 232 } 233 234 /** 235 * @see org.opencms.xml.types.A_CmsXmlContentValue#newInstance(java.lang.String, java.lang.String, java.lang.String) 236 */ 237 public I_CmsXmlSchemaType newInstance(String name, String minOccurs, String maxOccurs) { 238 239 return new CmsXmlHtmlValue(name, minOccurs, maxOccurs); 240 } 241 242 /** 243 * @see org.opencms.xml.types.I_CmsXmlContentValue#setStringValue(org.opencms.file.CmsObject, java.lang.String) 244 */ 245 public void setStringValue(CmsObject cms, String value) { 246 247 Element content = m_element.element(CmsXmlPage.NODE_CONTENT); 248 Element links = m_element.element(CmsXmlPage.NODE_LINKS); 249 CmsLinkProcessor linkProcessor = null; 250 251 String encoding = m_document.getEncoding(); 252 linkProcessor = m_document.getLinkProcessor(cms, new CmsLinkTable()); 253 254 String finalValue = value; 255 if (finalValue != null) { 256 // nested CDATA tags are not allowed, so replace CDATA tags with their contents 257 finalValue = finalValue.replaceAll("(?s)// <!\\[CDATA\\[(.*?)// \\]\\]>", "$1"); // special case for embedded Javascript 258 finalValue = finalValue.replaceAll("(?s)<!\\[CDATA\\[(.*?)\\]\\]>", "$1"); 259 } 260 if (encoding != null) { 261 // ensure all chars in the given content are valid chars for the selected charset 262 finalValue = CmsEncoder.adjustHtmlEncoding(finalValue, encoding); 263 } 264 265 // remove unnecessary tags if required 266 String contentConversion = m_document.getConversion(); 267 if (CmsHtmlConverter.isConversionEnabled(contentConversion)) { 268 CmsHtmlConverter converter = new CmsHtmlConverter(encoding, contentConversion); 269 finalValue = converter.convertToStringSilent(finalValue); 270 finalValue = fixNullCharacters(finalValue); 271 } 272 if (linkProcessor != null) { 273 try { 274 // replace links in HTML by macros and fill link table 275 finalValue = linkProcessor.replaceLinks(finalValue); 276 } catch (Exception exc) { 277 throw new CmsRuntimeException(Messages.get().container(Messages.ERR_HTML_DATA_PROCESSING_0), exc); 278 } 279 } 280 281 content.clearContent(); 282 links.clearContent(); 283 284 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(finalValue)) { 285 content.addCDATA(finalValue); 286 if (linkProcessor != null) { 287 // may be null in case of default value generation (i.e. setStringValue(String) was called) 288 289 CmsLinkTable linkTable = linkProcessor.getLinkTable(); 290 for (Iterator<CmsLink> i = linkTable.iterator(); i.hasNext();) { 291 CmsLink link = i.next(); 292 CmsLinkUpdateUtil.updateXmlForHtmlValue( 293 link, 294 link.getName(), 295 links.addElement(CmsXmlPage.NODE_LINK)); 296 } 297 } 298 } 299 300 // ensure the String value is re-calculated next time 301 m_stringValue = null; 302 } 303 304 /** 305 * JTidy sometimes erroneouslsy produces HTML containing 'null' characters (Unicode code point 0), which are 306 * invalid in an XML document. Until we find a way to prevent JTidy doing that, we remove the null characters 307 * from the HTML, and log a warning.<p> 308 * 309 * @param jtidyOutput the JTidy output 310 * @return the output with null characters removed 311 */ 312 protected String fixNullCharacters(String jtidyOutput) { 313 314 String outputWithoutNullChars = jtidyOutput.replaceAll("\u0000", ""); 315 if (jtidyOutput.length() != outputWithoutNullChars.length()) { 316 String context = ""; 317 if (m_document.getFile() != null) { 318 context = "(file=" + m_document.getFile().getRootPath() + ")"; 319 } 320 LOG.warn("HTML cleanup produced invalid null characters in output. " + context); 321 LOG.debug("HTML cleanup output = " + jtidyOutput); 322 } 323 return outputWithoutNullChars; 324 } 325 326 /** 327 * Creates the String value for this HTML value element.<p> 328 * 329 * @param cms an initialized instance of a CmsObject 330 * @param document the XML document this value belongs to 331 * 332 * @return the String value for this HTML value element 333 */ 334 private String createStringValue(CmsObject cms, I_CmsXmlDocument document) { 335 336 Element data = m_element.element(CmsXmlPage.NODE_CONTENT); 337 if (data == null) { 338 String content = m_element.getText(); 339 m_element.clearContent(); 340 int index = m_element.getParent().elements(m_element.getQName()).indexOf(m_element); 341 m_element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index); 342 m_element.addElement(CmsXmlPage.NODE_LINKS); 343 m_element.addElement(CmsXmlPage.NODE_CONTENT).addCDATA(content); 344 data = m_element.element(CmsXmlPage.NODE_CONTENT); 345 } 346 Attribute enabled = m_element.attribute(CmsXmlPage.ATTRIBUTE_ENABLED); 347 348 String content = ""; 349 if ((enabled == null) || Boolean.valueOf(enabled.getText()).booleanValue()) { 350 351 content = data.getText(); 352 353 CmsLinkTable linkTable = getLinkTable(); 354 if (!linkTable.isEmpty()) { 355 356 // link processing: replace macros with links 357 CmsLinkProcessor linkProcessor = document.getLinkProcessor(cms, linkTable); 358 try { 359 content = linkProcessor.processLinks(content); 360 } catch (ParserException e) { 361 // should better not happen 362 LOG.error(Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_LINK_PROCESS_FAILED_0), e); 363 } 364 } 365 } 366 return content; 367 } 368}