001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import java.net.URI;
031import java.net.URISyntaxException;
032
033/**
034 * Splits an URI String into separate components.<p>
035 *
036 * An URI is splitted into a <code>prefix</code>, a <code>anchor</code> and a <code>query</code> part.
037 */
038public class CmsUriSplitter {
039
040    /** Empty (non null) StringBuffer constant. */
041    private static final StringBuffer EMPTY_BUFFER = new StringBuffer(0);
042
043    /** The anchor part of the URI, for example <code>someanchor</code>. */
044    private String m_anchor;
045
046    /** Indicates if 'strict' URI parsing did produce an error. */
047    private boolean m_errorFree;
048
049    /** Indicates if 'strict' URI parsing was used. */
050    private boolean m_isStrict;
051
052    /** The URI protocol, for example <code>http</code> or <code>https</code>. */
053    private String m_protocol;
054
055    /** The prefix part of the URI, for example <code>http://www.opencms.org/some/path/</code>. */
056    private String m_prefix;
057
058    /** The query part of the URI, for example <code>a=b&c=d</code>. */
059    private String m_query;
060
061    /** The suffix part of the uri. */
062    private String m_suffix;
063
064    /** The original URI String that was split. */
065    private String m_uri;
066
067    /**
068     * Creates a splitted URI using the default (not strict) parsing mode.<p>
069     *
070     * @param uri the URI to split
071     */
072    public CmsUriSplitter(String uri) {
073
074        this(uri, false);
075    }
076
077    /**
078     * Creates a splitted URI using the given parsing mode.<p>
079     *
080     * Using 'strict' parsing mode, all requirements for an URI are checked.
081     * If 'strict' is set to <code>false</code>, then only some simple parsing rules are applied,
082     * in which case the result may not be 100% valid (but still usable).
083     * If 'strict' parsing generates an error, then simple parsing is used as a fallback.<p>
084     *
085     * @param uri the URI to split
086     * @param strict if <code>true</code>, then 'strict' parsing mode is used, otherwise a relaxed URI parsing is done
087     */
088    public CmsUriSplitter(String uri, boolean strict) {
089
090        m_uri = uri;
091        m_errorFree = true;
092        m_isStrict = strict;
093
094        if (strict) {
095
096            // use strict parsing
097            try {
098                URI u = new URI(uri);
099                m_protocol = u.getScheme();
100                m_prefix = ((m_protocol != null) ? m_protocol + ":" : "") + u.getRawSchemeSpecificPart();
101                m_anchor = u.getRawFragment();
102                m_query = u.getRawQuery();
103                if (m_prefix != null) {
104                    int i = m_prefix.indexOf('?');
105                    if (i != -1) {
106                        m_query = m_prefix.substring(i + 1);
107                        m_prefix = m_prefix.substring(0, i);
108                    }
109                }
110                if (m_anchor != null) {
111                    int i = m_anchor.indexOf('?');
112                    if (i != -1) {
113                        m_query = m_anchor.substring(i + 1);
114                        m_anchor = m_anchor.substring(0, i);
115                    }
116                }
117            } catch (Exception exc) {
118                // may be thrown by URI constructor if URI is invalid
119                strict = false;
120                m_errorFree = false;
121            }
122        }
123
124        if ((!strict) && (uri != null)) {
125
126            // use simple parsing
127            StringBuffer prefix = new StringBuffer(uri.length());
128            StringBuffer anchor = EMPTY_BUFFER;
129            StringBuffer query = EMPTY_BUFFER;
130
131            int len = uri.length();
132            int cur = 0;
133
134            for (int i = 0; i < len; i++) {
135                char c = uri.charAt(i);
136                if ((cur == 0) && (c == ':')) {
137                    m_protocol = prefix.toString();
138                }
139                if (c == '#') {
140                    // start of anchor
141                    cur = 1;
142                    anchor = new StringBuffer(uri.length());
143                    continue;
144                }
145                if (c == '?') {
146                    // start of query
147                    cur = 2;
148                    // ensure a duplicate query part is 'flushed' (same behavior as strict parser)
149                    query = new StringBuffer(uri.length());
150                    continue;
151                }
152                switch (cur) {
153                    case 1:
154                        // append to anchor
155                        anchor.append(c);
156                        break;
157                    case 2:
158                        // append to query
159                        query.append(c);
160                        break;
161                    default:
162                        // append to prefix
163                        prefix.append(c);
164                        break;
165                }
166            }
167
168            if (prefix.length() > 0) {
169                m_prefix = prefix.toString();
170            }
171            if (anchor.length() > 0) {
172                m_anchor = anchor.toString();
173            }
174            if (query.length() > 0) {
175                m_query = query.toString();
176            }
177        }
178    }
179
180    /**
181     * Checks if the given URI is well formed.<p>
182     *
183     * @param uri the URI to check
184     *
185     * @return <code>true</code> if the given URI is well formed
186     */
187    @SuppressWarnings("unused")
188    public static boolean isValidUri(String uri) {
189
190        boolean result = false;
191        try {
192            new URI(uri);
193            result = true;
194        } catch (Exception e) {
195            // nothing to do
196        }
197        return result;
198    }
199
200    /**
201     * @see java.lang.Object#equals(java.lang.Object)
202     */
203    @Override
204    public boolean equals(Object obj) {
205
206        if (obj == this) {
207            return true;
208        }
209        if (obj instanceof CmsUriSplitter) {
210            CmsUriSplitter other = (CmsUriSplitter)obj;
211            if (!((m_protocol == other.m_protocol) || ((m_protocol != null) && m_protocol.equals(other.m_protocol)))) {
212                return false;
213            }
214            if (!((m_prefix == other.m_prefix) || ((m_prefix != null) && m_prefix.equals(other.m_prefix)))) {
215                return false;
216            }
217            if (!((m_anchor == other.m_anchor) || ((m_anchor != null) && m_anchor.equals(other.m_anchor)))) {
218                return false;
219            }
220            if (!((m_query == other.m_query) || ((m_query != null) && m_query.equals(other.m_query)))) {
221                return false;
222            }
223            return true;
224        }
225        return false;
226    }
227
228    /**
229     * Returns the anchor part of the uri, for example <code>someanchor</code>,
230     * or <code>null</code> if no anchor is available.<p>
231     *
232     * @return the anchor part of the uri
233     */
234    public String getAnchor() {
235
236        return m_anchor;
237    }
238
239    /**
240     * Returns the prefix part of the uri, for example <code>http://www.opencms.org/some/path/</code>,
241     * or <code>null</code> if no prefix is available.<p>
242     *
243     * @return the prefix part of the uri
244     */
245    public String getPrefix() {
246
247        return m_prefix;
248    }
249
250    /**
251     * Returns the URI protocol, for example <code>http</code> or <code>https</code>.<p>
252     *
253     * @return the URI protocol
254     */
255    public String getProtocol() {
256
257        return m_protocol;
258    }
259
260    /**
261     * Returns the query part of the uri, for example <code>a=b&c=d</code>,
262     * or <code>null</code> if no query is available.<p>
263     *
264     * @return the query part of the uri
265     */
266    public String getQuery() {
267
268        return m_query;
269    }
270
271    /**
272     * Returns the suffix part of the uri, a combination of query and anchor,
273     * for example <code>?a=b&c=d#someanchor</code>,
274     * or the empty String if no suffix is available.<p>
275     *
276     * @return the suffix part of the uri
277     */
278    public String getSuffix() {
279
280        if (m_suffix == null) {
281            StringBuffer result = new StringBuffer();
282            if (m_query != null) {
283                result.append('?');
284                result.append(m_query);
285            }
286            if (m_anchor != null) {
287                result.append('#');
288                result.append(m_anchor);
289            }
290            m_suffix = result.toString();
291        }
292        return m_suffix;
293    }
294
295    /**
296     * Returns the URI String passed to this URI splitter.<p>
297     *
298     * @return the URI String passed to this URI splitter
299     */
300    public String getUri() {
301
302        return m_uri;
303    }
304
305    /**
306     * @see java.lang.Object#hashCode()
307     */
308    @Override
309    public int hashCode() {
310
311        int hashCode = 0;
312        if (m_prefix != null) {
313            hashCode += m_prefix.hashCode();
314        }
315        if (m_anchor != null) {
316            hashCode += m_anchor.hashCode();
317        }
318        if (m_query != null) {
319            hashCode += m_query.hashCode();
320        }
321        return hashCode;
322    }
323
324    /**
325     * Returns <code>true</code> if the URI was parsed error free in 'strict' mode,
326     * or if the simple mode was used.<p>
327     *
328     * @return <code>true</code> if the URI was parsed error free in 'strict' mode,
329     *      or if the simple mode was used
330     */
331    public boolean isErrorFree() {
332
333        return m_errorFree;
334    }
335
336    /**
337     * Returns an URI object created from the original input String.<p>
338     *
339     * This method will do a "best effort" to convert the original input String to a legal URI.
340     * Most notably, it will be able to handle original input Strings that contain a space " "
341     * and other usually illegal characters.<p>
342     *
343     * @return an URI object created from the original input String
344     *
345     * @throws URISyntaxException in case no URI object can be created from the original input String
346     */
347    public URI toURI() throws URISyntaxException {
348
349        if (m_isStrict && m_errorFree) {
350            // we have already verified that the URI contains no errors
351            return new URI(m_uri);
352        }
353        // create a new URI from the components
354        // using this constructor the input will be escaped if required
355        return new URI(null, m_prefix + (m_query != null ? "?" + m_query : ""), m_anchor);
356    }
357}