001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (C) Alkacon Software (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import org.opencms.cache.CmsVfsMemoryObjectCache;
031import org.opencms.file.CmsFile;
032import org.opencms.file.CmsObject;
033import org.opencms.i18n.CmsEncoder;
034import org.opencms.main.CmsException;
035import org.opencms.main.CmsLog;
036
037import java.io.ByteArrayInputStream;
038import java.io.InputStream;
039import java.util.Collection;
040import java.util.HashSet;
041import java.util.Set;
042
043import org.apache.commons.logging.Log;
044
045import org.owasp.validator.html.AntiSamy;
046import org.owasp.validator.html.CleanResults;
047import org.owasp.validator.html.Policy;
048import org.owasp.validator.html.PolicyException;
049import org.owasp.validator.html.ScanException;
050
051/**
052 * This class is responsible for automatically escaping parameters in Flex requests. It keeps track
053 * of which parameters to escape (or not escape), and which parameters need to be processed by AntiSamy.<p>
054 */
055public class CmsParameterEscaper {
056
057    /** The logger instance for this class. */
058    private static final Log LOG = CmsLog.getLog(CmsParameterEscaper.class);
059
060    /** The file name of the default policy. */
061    public static final String DEFAULT_POLICY = "antisamy-opencms.xml";
062
063    /** The default policy, which is used when no policy path is given. */
064    protected static Policy defaultPolicy;
065
066    static {
067        try {
068            // Don't hardcode the resource path, use the package of this class as the location
069            String packageName = CmsParameterEscaper.class.getPackage().getName();
070            String resourceName = packageName.replace(".", "/") + "/" + DEFAULT_POLICY;
071            InputStream stream = CmsParameterEscaper.class.getClassLoader().getResourceAsStream(resourceName);
072            Policy policy = Policy.getInstance(stream);
073            defaultPolicy = policy;
074        } catch (PolicyException e) {
075            LOG.error(e.getLocalizedMessage(), e);
076        }
077    }
078
079    /** The names of parameters which shouldn't be escaped. */
080    private Set<String> m_exceptions = new HashSet<String>();
081
082    /** The names of parameters which need to be HTML-cleaned. */
083    private Set<String> m_cleanHtml = new HashSet<String>();
084
085    /** The AntiSamy instance for cleaning HTML. */
086    private AntiSamy m_antiSamy;
087
088    /**
089     * Helper method for reading an AntiSamy policy file from the VFS.<p>
090     *
091     * @param cms the current CMS context
092     * @param sitePath the site path of the policy file
093     *
094     * @return the policy object for the given path
095     */
096    public static Policy readPolicy(CmsObject cms, String sitePath) {
097
098        try {
099            CmsFile policyFile = cms.readFile(sitePath);
100            ByteArrayInputStream input = new ByteArrayInputStream(policyFile.getContents());
101
102            // we use the deprecated method here because it is the only way to load
103            // a policy directly from the VFS.
104            Policy policy = Policy.getInstance(input);
105            return policy;
106        } catch (CmsException e) {
107            LOG.error("Could not read Antisamy policy file");
108            LOG.error(e.getLocalizedMessage(), e);
109            return null;
110        } catch (PolicyException e) {
111            LOG.error("Invalid Antisamy policy read from " + sitePath);
112            LOG.error(e.getLocalizedMessage(), e);
113            return null;
114        }
115    }
116
117    /**
118     * Creates a new AntiSamy instance for a given policy path.<p>
119     *
120     * @param cms the current CMS context
121     * @param policyPath the policy site path
122     *
123     * @return the new AntiSamy instance
124     */
125    public AntiSamy createAntiSamy(CmsObject cms, String policyPath) {
126
127        String rootPath = cms.addSiteRoot(policyPath);
128        Policy policy = null;
129        if (policyPath != null) {
130            Object cacheValue = CmsVfsMemoryObjectCache.getVfsMemoryObjectCache().getCachedObject(cms, rootPath);
131            if (cacheValue == null) {
132                policy = readPolicy(cms, policyPath);
133                if (policy != null) {
134                    CmsVfsMemoryObjectCache.getVfsMemoryObjectCache().putCachedObject(cms, rootPath, policy);
135                }
136            } else {
137                policy = (Policy)cacheValue;
138            }
139        }
140        if (policy == null) {
141            policy = defaultPolicy;
142        }
143        if (policy != null) {
144            return new AntiSamy(policy);
145        }
146        return null;
147    }
148
149    /**
150     * Enables the AntiSamy HTML cleaning for some parameters.<p>
151     *
152     * @param cms the current CMS context
153     * @param policyPath the policy site path in the VFS
154     * @param params the parameters for which HTML cleaning should be  enabled
155     */
156    public void enableAntiSamy(CmsObject cms, String policyPath, Set<String> params) {
157
158        m_antiSamy = createAntiSamy(cms, policyPath);
159        m_cleanHtml = params;
160    }
161
162    /**
163     * Escapes a single parameter value.<p>
164     *
165     * @param name the name of the parameter
166     * @param html the value of the parameter
167     *
168     * @return the escaped parameter value
169     */
170    public String escape(String name, String html) {
171
172        if (html == null) {
173            return null;
174        }
175        if (m_exceptions.contains(name)) {
176            return html;
177        }
178        LOG.info("Escaping parameter '" + name + "' with value '" + html + "'");
179        if (m_cleanHtml.contains(name)) {
180            return filterAntiSamy(html);
181        }
182        return CmsEncoder.escapeXml(html);
183    }
184
185    /**
186     * Escapes an array of parameter values.<p>
187     *
188     * @param name the parameter name
189     * @param values the parameter values
190     *
191     * @return the escaped parameter values
192     */
193    public String[] escape(String name, String[] values) {
194
195        if (values == null) {
196            return null;
197        }
198        if (m_exceptions.contains(name)) {
199            return values;
200        }
201        boolean cleanHtml = m_cleanHtml.contains(name);
202        String[] result = new String[values.length];
203        for (int i = 0; i < values.length; i++) {
204            if (cleanHtml) {
205                result[i] = filterAntiSamy(values[i]);
206            } else {
207                result[i] = CmsEncoder.escapeXml(values[i]);
208            }
209        }
210        return result;
211    }
212
213    /**
214     * Filters HTML input using the internal AntiSamy instance.<p>
215     *
216     * @param html the HTML to filter
217     *
218     * @return the filtered HTML
219     */
220    public String filterAntiSamy(String html) {
221
222        if (m_antiSamy == null) {
223            LOG.warn("Antisamy policy invalid, using escapeXml as a fallback");
224            return CmsEncoder.escapeXml(html);
225        }
226        try {
227            CleanResults results = m_antiSamy.scan(html);
228            if (results.getNumberOfErrors() > 0) {
229                LOG.info("Antisamy error messages:");
230                for (Object message : results.getErrorMessages()) {
231                    LOG.info(message);
232                }
233            }
234            return results.getCleanHTML();
235        } catch (PolicyException e) {
236            LOG.error(e.getLocalizedMessage(), e);
237            return CmsEncoder.escapeXml(html);
238        } catch (ScanException e) {
239            LOG.error(e.getLocalizedMessage(), e);
240            return CmsEncoder.escapeXml(html);
241        }
242    }
243
244    /**
245     * Sets the set of names of parameters which shouldn't be escaped.<p>
246     *
247     * @param exceptions a set of parameter names
248     */
249    public void setExceptions(Collection<String> exceptions) {
250
251        m_exceptions = new HashSet<String>(exceptions);
252    }
253
254}