001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml.types;
029
030import org.opencms.file.CmsObject;
031import org.opencms.i18n.CmsEncoder;
032import org.opencms.main.CmsLog;
033import org.opencms.main.CmsRuntimeException;
034import org.opencms.relations.CmsLink;
035import org.opencms.relations.CmsLinkUpdateUtil;
036import org.opencms.staticexport.CmsLinkProcessor;
037import org.opencms.staticexport.CmsLinkTable;
038import org.opencms.util.CmsHtmlConverter;
039import org.opencms.util.CmsHtmlExtractor;
040import org.opencms.util.CmsStringUtil;
041import org.opencms.xml.CmsXmlGenericWrapper;
042import org.opencms.xml.I_CmsXmlDocument;
043import org.opencms.xml.page.CmsXmlPage;
044
045import java.util.Iterator;
046import java.util.Locale;
047
048import org.apache.commons.logging.Log;
049
050import org.dom4j.Attribute;
051import org.dom4j.Element;
052import org.htmlparser.util.ParserException;
053
054/**
055 * Describes the XML content type "OpenCmsHtml".<p>
056 *
057 * @since 6.0.0
058 */
059public class CmsXmlHtmlValue extends A_CmsXmlContentValue {
060
061    /** The name of this type as used in the XML schema. */
062    public static final String TYPE_NAME = "OpenCmsHtml";
063
064    /** The log object for this class. */
065    private static final Log LOG = CmsLog.getLog(CmsXmlHtmlValue.class);
066
067    /** The schema definition String is located in a text for easier editing. */
068    private static String m_schemaDefinition;
069
070    /** Null value for plain text extraction errors. */
071    private static final String NULL_VALUE = "null";
072
073    /** Base type for single type instances, required for XML pages. */
074    private static final I_CmsXmlSchemaType TYPE_BASE = new CmsXmlHtmlValue("base", "1", "1");
075
076    /** The plain text value of the element node. */
077    private String m_plainTextValue;
078
079    /** The String value of the element node. */
080    private String m_stringValue;
081
082    /**
083     * Creates a new, empty schema type descriptor of type "OpenCmsHtml".<p>
084     */
085    public CmsXmlHtmlValue() {
086
087        // empty constructor is required for class registration
088    }
089
090    /**
091     * Creates a new XML content value of type "OpenCmsHtml".<p>
092     *
093     * @param document the XML content instance this value belongs to
094     * @param element the XML element that contains this value
095     * @param locale the locale this value is created for
096     */
097    public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale) {
098
099        super(document, element, locale, TYPE_BASE);
100    }
101
102    /**
103     * Creates a new XML content value of type "OpenCmsHtml".<p>
104     *
105     * @param document the XML content instance this value belongs to
106     * @param element the XML element that contains this value
107     * @param locale the locale this value is created for
108     * @param type the type instance to create the value for
109     */
110    public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale, I_CmsXmlSchemaType type) {
111
112        super(document, element, locale, type);
113    }
114
115    /**
116     * Creates a new schema type descriptor for the type "OpenCmsHtml".<p>
117     *
118     * @param name the name of the XML node containing the value according to the XML schema
119     * @param minOccurs minimum number of occurrences of this type according to the XML schema
120     * @param maxOccurs maximum number of occurrences of this type according to the XML schema
121     */
122    public CmsXmlHtmlValue(String name, String minOccurs, String maxOccurs) {
123
124        super(name, minOccurs, maxOccurs);
125    }
126
127    /**
128     * @see org.opencms.xml.types.A_CmsXmlContentValue#createValue(I_CmsXmlDocument, org.dom4j.Element, Locale)
129     */
130    public I_CmsXmlContentValue createValue(I_CmsXmlDocument document, Element element, Locale locale) {
131
132        return new CmsXmlHtmlValue(document, element, locale, this);
133    }
134
135    /**
136     * @see org.opencms.xml.types.I_CmsXmlSchemaType#generateXml(org.opencms.file.CmsObject, org.opencms.xml.I_CmsXmlDocument, org.dom4j.Element, java.util.Locale)
137     */
138    @Override
139    public Element generateXml(CmsObject cms, I_CmsXmlDocument document, Element root, Locale locale) {
140
141        Element element = root.addElement(getName());
142        int index = element.getParent().elements(element.getQName()).indexOf(element);
143        element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index);
144        element.addElement(CmsXmlPage.NODE_LINKS);
145        element.addElement(CmsXmlPage.NODE_CONTENT);
146
147        // get the default value from the content handler
148        String defaultValue = document.getHandler().getDefault(cms, this, locale);
149        if (defaultValue != null) {
150            try {
151                I_CmsXmlContentValue value = createValue(document, element, locale);
152                value.setStringValue(cms, defaultValue);
153            } catch (CmsRuntimeException e) {
154                // should not happen if default value is correct
155                LOG.error(
156                    Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_INVALID_ELEM_DEFAULT_1, defaultValue),
157                    e);
158                element.clearContent();
159            }
160        }
161        return element;
162    }
163
164    /**
165     * Returns the link table of this XML page element.<p>
166     *
167     * @return the link table of this XML page element
168     */
169    public CmsLinkTable getLinkTable() {
170
171        CmsLinkTable linkTable = new CmsLinkTable();
172        Element links = m_element.element(CmsXmlPage.NODE_LINKS);
173        if (links != null) {
174            Iterator<Element> itLinks = CmsXmlGenericWrapper.elementIterator(links, CmsXmlPage.NODE_LINK);
175            while (itLinks.hasNext()) {
176                Element lelem = itLinks.next();
177                linkTable.addLink(new CmsLink(lelem));
178            }
179        }
180        return linkTable;
181    }
182
183    /**
184     * @see org.opencms.xml.types.I_CmsXmlContentValue#getPlainText(org.opencms.file.CmsObject)
185     */
186    @Override
187    public String getPlainText(CmsObject cms) {
188
189        if (m_plainTextValue == null) {
190            try {
191                m_plainTextValue = CmsHtmlExtractor.extractText(getStringValue(cms), m_document.getEncoding());
192            } catch (Exception exc) {
193                m_plainTextValue = NULL_VALUE;
194            }
195        }
196        if (m_plainTextValue == NULL_VALUE) {
197            return null;
198        }
199        return m_plainTextValue;
200    }
201
202    /**
203     * @see org.opencms.xml.types.I_CmsXmlSchemaType#getSchemaDefinition()
204     */
205    public String getSchemaDefinition() {
206
207        // the schema definition is located in a separate file for easier editing
208        if (m_schemaDefinition == null) {
209            m_schemaDefinition = readSchemaDefinition("org/opencms/xml/types/XmlHtmlValue.xsd");
210        }
211        return m_schemaDefinition;
212    }
213
214    /**
215     * @see org.opencms.xml.types.I_CmsXmlContentValue#getStringValue(org.opencms.file.CmsObject)
216     */
217    public String getStringValue(CmsObject cms) {
218
219        if (m_stringValue == null) {
220            m_stringValue = createStringValue(cms, m_document);
221        }
222
223        return m_stringValue;
224    }
225
226    /**
227     * @see org.opencms.xml.types.A_CmsXmlContentValue#getTypeName()
228     */
229    public String getTypeName() {
230
231        return TYPE_NAME;
232    }
233
234    /**
235     * @see org.opencms.xml.types.A_CmsXmlContentValue#newInstance(java.lang.String, java.lang.String, java.lang.String)
236     */
237    public I_CmsXmlSchemaType newInstance(String name, String minOccurs, String maxOccurs) {
238
239        return new CmsXmlHtmlValue(name, minOccurs, maxOccurs);
240    }
241
242    /**
243     * @see org.opencms.xml.types.I_CmsXmlContentValue#setStringValue(org.opencms.file.CmsObject, java.lang.String)
244     */
245    public void setStringValue(CmsObject cms, String value) {
246
247        Element content = m_element.element(CmsXmlPage.NODE_CONTENT);
248        Element links = m_element.element(CmsXmlPage.NODE_LINKS);
249        CmsLinkProcessor linkProcessor = null;
250
251        String encoding = m_document.getEncoding();
252        linkProcessor = m_document.getLinkProcessor(cms, new CmsLinkTable());
253
254        String finalValue = value;
255        if (finalValue != null) {
256            // nested CDATA tags are not allowed, so replace CDATA tags with their contents
257            finalValue = finalValue.replaceAll("(?s)// <!\\[CDATA\\[(.*?)// \\]\\]>", "$1"); // special case for embedded Javascript
258            finalValue = finalValue.replaceAll("(?s)<!\\[CDATA\\[(.*?)\\]\\]>", "$1");
259        }
260        if (encoding != null) {
261            // ensure all chars in the given content are valid chars for the selected charset
262            finalValue = CmsEncoder.adjustHtmlEncoding(finalValue, encoding);
263        }
264
265        // remove unnecessary tags if required
266        String contentConversion = m_document.getConversion();
267        if (CmsHtmlConverter.isConversionEnabled(contentConversion)) {
268            CmsHtmlConverter converter = new CmsHtmlConverter(encoding, contentConversion);
269            finalValue = converter.convertToStringSilent(finalValue);
270            finalValue = fixNullCharacters(finalValue);
271        }
272        if (linkProcessor != null) {
273            try {
274                // replace links in HTML by macros and fill link table
275                finalValue = linkProcessor.replaceLinks(finalValue);
276            } catch (Exception exc) {
277                throw new CmsRuntimeException(Messages.get().container(Messages.ERR_HTML_DATA_PROCESSING_0), exc);
278            }
279        }
280
281        content.clearContent();
282        links.clearContent();
283
284        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(finalValue)) {
285            content.addCDATA(finalValue);
286            if (linkProcessor != null) {
287                // may be null in case of default value generation (i.e. setStringValue(String) was called)
288
289                CmsLinkTable linkTable = linkProcessor.getLinkTable();
290                for (Iterator<CmsLink> i = linkTable.iterator(); i.hasNext();) {
291                    CmsLink link = i.next();
292                    CmsLinkUpdateUtil.updateXmlForHtmlValue(
293                        link,
294                        link.getName(),
295                        links.addElement(CmsXmlPage.NODE_LINK));
296                }
297            }
298        }
299
300        // ensure the String value is re-calculated next time
301        m_stringValue = null;
302    }
303
304    /**
305     * JTidy sometimes erroneouslsy produces HTML containing 'null' characters (Unicode code point 0), which are
306     * invalid in an XML document. Until we find a way to prevent JTidy doing that, we remove the null characters
307     * from the HTML, and log a warning.<p>
308     *
309     * @param jtidyOutput the JTidy output
310     * @return the output with null characters removed
311     */
312    protected String fixNullCharacters(String jtidyOutput) {
313
314        String outputWithoutNullChars = jtidyOutput.replaceAll("\u0000", "");
315        if (jtidyOutput.length() != outputWithoutNullChars.length()) {
316            String context = "";
317            if (m_document.getFile() != null) {
318                context = "(file=" + m_document.getFile().getRootPath() + ")";
319            }
320            LOG.warn("HTML cleanup produced invalid null characters in output. " + context);
321            LOG.debug("HTML cleanup output = " + jtidyOutput);
322        }
323        return outputWithoutNullChars;
324    }
325
326    /**
327     * Creates the String value for this HTML value element.<p>
328     *
329     * @param cms an initialized instance of a CmsObject
330     * @param document the XML document this value belongs to
331     *
332     * @return the String value for this HTML value element
333     */
334    private String createStringValue(CmsObject cms, I_CmsXmlDocument document) {
335
336        Element data = m_element.element(CmsXmlPage.NODE_CONTENT);
337        if (data == null) {
338            String content = m_element.getText();
339            m_element.clearContent();
340            int index = m_element.getParent().elements(m_element.getQName()).indexOf(m_element);
341            m_element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index);
342            m_element.addElement(CmsXmlPage.NODE_LINKS);
343            m_element.addElement(CmsXmlPage.NODE_CONTENT).addCDATA(content);
344            data = m_element.element(CmsXmlPage.NODE_CONTENT);
345        }
346        Attribute enabled = m_element.attribute(CmsXmlPage.ATTRIBUTE_ENABLED);
347
348        String content = "";
349        if ((enabled == null) || Boolean.valueOf(enabled.getText()).booleanValue()) {
350
351            content = data.getText();
352
353            CmsLinkTable linkTable = getLinkTable();
354            if (!linkTable.isEmpty()) {
355
356                // link processing: replace macros with links
357                CmsLinkProcessor linkProcessor = document.getLinkProcessor(cms, linkTable);
358                try {
359                    content = linkProcessor.processLinks(content);
360                } catch (ParserException e) {
361                    // should better not happen
362                    LOG.error(Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_LINK_PROCESS_FAILED_0), e);
363                }
364            }
365        }
366        return content;
367    }
368}