001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.util; 029 030import java.net.URI; 031import java.net.URISyntaxException; 032 033/** 034 * Splits an URI String into separate components.<p> 035 * 036 * An URI is splitted into a <code>prefix</code>, a <code>anchor</code> and a <code>query</code> part. 037 */ 038public class CmsUriSplitter { 039 040 /** Empty (non null) StringBuffer constant. */ 041 private static final StringBuffer EMPTY_BUFFER = new StringBuffer(0); 042 043 /** The anchor part of the URI, for example <code>someanchor</code>. */ 044 private String m_anchor; 045 046 /** Indicates if 'strict' URI parsing did produce an error. */ 047 private boolean m_errorFree; 048 049 /** Indicates if 'strict' URI parsing was used. */ 050 private boolean m_isStrict; 051 052 /** The URI protocol, for example <code>http</code> or <code>https</code>. */ 053 private String m_protocol; 054 055 /** The prefix part of the URI, for example <code>http://www.opencms.org/some/path/</code>. */ 056 private String m_prefix; 057 058 /** The query part of the URI, for example <code>a=b&c=d</code>. */ 059 private String m_query; 060 061 /** The suffix part of the uri. */ 062 private String m_suffix; 063 064 /** The original URI String that was split. */ 065 private String m_uri; 066 067 /** 068 * Creates a splitted URI using the default (not strict) parsing mode.<p> 069 * 070 * @param uri the URI to split 071 */ 072 public CmsUriSplitter(String uri) { 073 074 this(uri, false); 075 } 076 077 /** 078 * Creates a splitted URI using the given parsing mode.<p> 079 * 080 * Using 'strict' parsing mode, all requirements for an URI are checked. 081 * If 'strict' is set to <code>false</code>, then only some simple parsing rules are applied, 082 * in which case the result may not be 100% valid (but still usable). 083 * If 'strict' parsing generates an error, then simple parsing is used as a fallback.<p> 084 * 085 * @param uri the URI to split 086 * @param strict if <code>true</code>, then 'strict' parsing mode is used, otherwise a relaxed URI parsing is done 087 */ 088 public CmsUriSplitter(String uri, boolean strict) { 089 090 m_uri = uri; 091 m_errorFree = true; 092 m_isStrict = strict; 093 094 if (strict) { 095 096 // use strict parsing 097 try { 098 URI u = new URI(uri); 099 m_protocol = u.getScheme(); 100 m_prefix = ((m_protocol != null) ? m_protocol + ":" : "") + u.getRawSchemeSpecificPart(); 101 m_anchor = u.getRawFragment(); 102 m_query = u.getRawQuery(); 103 if (m_prefix != null) { 104 int i = m_prefix.indexOf('?'); 105 if (i != -1) { 106 m_query = m_prefix.substring(i + 1); 107 m_prefix = m_prefix.substring(0, i); 108 } 109 } 110 if (m_anchor != null) { 111 int i = m_anchor.indexOf('?'); 112 if (i != -1) { 113 m_query = m_anchor.substring(i + 1); 114 m_anchor = m_anchor.substring(0, i); 115 } 116 } 117 } catch (Exception exc) { 118 // may be thrown by URI constructor if URI is invalid 119 strict = false; 120 m_errorFree = false; 121 } 122 } 123 124 if ((!strict) && (uri != null)) { 125 126 // use simple parsing 127 StringBuffer prefix = new StringBuffer(uri.length()); 128 StringBuffer anchor = EMPTY_BUFFER; 129 StringBuffer query = EMPTY_BUFFER; 130 131 int len = uri.length(); 132 int cur = 0; 133 134 for (int i = 0; i < len; i++) { 135 char c = uri.charAt(i); 136 if ((cur == 0) && (c == ':')) { 137 m_protocol = prefix.toString(); 138 } 139 if (c == '#') { 140 // start of anchor 141 cur = 1; 142 anchor = new StringBuffer(uri.length()); 143 continue; 144 } 145 if (c == '?') { 146 // start of query 147 cur = 2; 148 // ensure a duplicate query part is 'flushed' (same behavior as strict parser) 149 query = new StringBuffer(uri.length()); 150 continue; 151 } 152 switch (cur) { 153 case 1: 154 // append to anchor 155 anchor.append(c); 156 break; 157 case 2: 158 // append to query 159 query.append(c); 160 break; 161 default: 162 // append to prefix 163 prefix.append(c); 164 break; 165 } 166 } 167 168 if (prefix.length() > 0) { 169 m_prefix = prefix.toString(); 170 } 171 if (anchor.length() > 0) { 172 m_anchor = anchor.toString(); 173 } 174 if (query.length() > 0) { 175 m_query = query.toString(); 176 } 177 } 178 } 179 180 /** 181 * Checks if the given URI is well formed.<p> 182 * 183 * @param uri the URI to check 184 * 185 * @return <code>true</code> if the given URI is well formed 186 */ 187 @SuppressWarnings("unused") 188 public static boolean isValidUri(String uri) { 189 190 boolean result = false; 191 try { 192 new URI(uri); 193 result = true; 194 } catch (Exception e) { 195 // nothing to do 196 } 197 return result; 198 } 199 200 /** 201 * @see java.lang.Object#equals(java.lang.Object) 202 */ 203 @Override 204 public boolean equals(Object obj) { 205 206 if (obj == this) { 207 return true; 208 } 209 if (obj instanceof CmsUriSplitter) { 210 CmsUriSplitter other = (CmsUriSplitter)obj; 211 if (!((m_protocol == other.m_protocol) || ((m_protocol != null) && m_protocol.equals(other.m_protocol)))) { 212 return false; 213 } 214 if (!((m_prefix == other.m_prefix) || ((m_prefix != null) && m_prefix.equals(other.m_prefix)))) { 215 return false; 216 } 217 if (!((m_anchor == other.m_anchor) || ((m_anchor != null) && m_anchor.equals(other.m_anchor)))) { 218 return false; 219 } 220 if (!((m_query == other.m_query) || ((m_query != null) && m_query.equals(other.m_query)))) { 221 return false; 222 } 223 return true; 224 } 225 return false; 226 } 227 228 /** 229 * Returns the anchor part of the uri, for example <code>someanchor</code>, 230 * or <code>null</code> if no anchor is available.<p> 231 * 232 * @return the anchor part of the uri 233 */ 234 public String getAnchor() { 235 236 return m_anchor; 237 } 238 239 /** 240 * Returns the prefix part of the uri, for example <code>http://www.opencms.org/some/path/</code>, 241 * or <code>null</code> if no prefix is available.<p> 242 * 243 * @return the prefix part of the uri 244 */ 245 public String getPrefix() { 246 247 return m_prefix; 248 } 249 250 /** 251 * Returns the URI protocol, for example <code>http</code> or <code>https</code>.<p> 252 * 253 * @return the URI protocol 254 */ 255 public String getProtocol() { 256 257 return m_protocol; 258 } 259 260 /** 261 * Returns the query part of the uri, for example <code>a=b&c=d</code>, 262 * or <code>null</code> if no query is available.<p> 263 * 264 * @return the query part of the uri 265 */ 266 public String getQuery() { 267 268 return m_query; 269 } 270 271 /** 272 * Returns the suffix part of the uri, a combination of query and anchor, 273 * for example <code>?a=b&c=d#someanchor</code>, 274 * or the empty String if no suffix is available.<p> 275 * 276 * @return the suffix part of the uri 277 */ 278 public String getSuffix() { 279 280 if (m_suffix == null) { 281 StringBuffer result = new StringBuffer(); 282 if (m_query != null) { 283 result.append('?'); 284 result.append(m_query); 285 } 286 if (m_anchor != null) { 287 result.append('#'); 288 result.append(m_anchor); 289 } 290 m_suffix = result.toString(); 291 } 292 return m_suffix; 293 } 294 295 /** 296 * Returns the URI String passed to this URI splitter.<p> 297 * 298 * @return the URI String passed to this URI splitter 299 */ 300 public String getUri() { 301 302 return m_uri; 303 } 304 305 /** 306 * @see java.lang.Object#hashCode() 307 */ 308 @Override 309 public int hashCode() { 310 311 int hashCode = 0; 312 if (m_prefix != null) { 313 hashCode += m_prefix.hashCode(); 314 } 315 if (m_anchor != null) { 316 hashCode += m_anchor.hashCode(); 317 } 318 if (m_query != null) { 319 hashCode += m_query.hashCode(); 320 } 321 return hashCode; 322 } 323 324 /** 325 * Returns <code>true</code> if the URI was parsed error free in 'strict' mode, 326 * or if the simple mode was used.<p> 327 * 328 * @return <code>true</code> if the URI was parsed error free in 'strict' mode, 329 * or if the simple mode was used 330 */ 331 public boolean isErrorFree() { 332 333 return m_errorFree; 334 } 335 336 /** 337 * Returns an URI object created from the original input String.<p> 338 * 339 * This method will do a "best effort" to convert the original input String to a legal URI. 340 * Most notably, it will be able to handle original input Strings that contain a space " " 341 * and other usually illegal characters.<p> 342 * 343 * @return an URI object created from the original input String 344 * 345 * @throws URISyntaxException in case no URI object can be created from the original input String 346 */ 347 public URI toURI() throws URISyntaxException { 348 349 if (m_isStrict && m_errorFree) { 350 // we have already verified that the URI contains no errors 351 return new URI(m_uri); 352 } 353 // create a new URI from the components 354 // using this constructor the input will be escaped if required 355 return new URI(null, m_prefix + (m_query != null ? "?" + m_query : ""), m_anchor); 356 } 357}