uriparse/
query.rs

1//! Query Component
2//!
3//! See [[RFC3986, Section 3.4](https://tools.ietf.org/html/rfc3986#section-3.4)].
4//!
5//! This crate does not do query string parsing, it will simply make sure that it is a valid query
6//! string as defined by the RFC. You will need to use another crate (e.g.
7//! [queryst](https://github.com/rustless/queryst)) if you want it parsed.
8
9use std::borrow::Cow;
10use std::convert::{Infallible, TryFrom};
11use std::error::Error;
12use std::fmt::{self, Display, Formatter};
13use std::hash::{Hash, Hasher};
14use std::ops::Deref;
15use std::str;
16
17use crate::utility::{
18    get_percent_encoded_value, normalize_string, percent_encoded_equality, percent_encoded_hash,
19    UNRESERVED_CHAR_MAP,
20};
21
22/// A map of byte characters that determines if a character is a valid query character.
23#[rustfmt::skip]
24const QUERY_CHAR_MAP: [u8; 256] = [
25 // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
26    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 0
27    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 1
28    0, b'!',    0,    0, b'$', b'%', b'&',b'\'', b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', // 2
29 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';',    0, b'=',    0, b'?', // 3
30 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 4
31 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',    0,    0,    0,    0, b'_', // 5
32    0, b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', // 6
33 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',    0,    0,    0, b'~',    0, // 7
34    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 8
35    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 9
36    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // A
37    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // B
38    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // C
39    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // D
40    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // E
41    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // F
42];
43
44/// The query component as defined in
45/// [[RFC3986, Section 3.4](https://tools.ietf.org/html/rfc3986#section-3.4)].
46///
47/// The query is case-sensitive. Furthermore, percent-encoding plays no role in equality checking
48/// for characters in the unreserved character set meaning that `"query"` and `"que%72y"` are
49/// identical. Both of these attributes are reflected in the equality and hash functions.
50///
51/// However, be aware that just because percent-encoding plays no role in equality checking does not
52/// mean that the query is normalized. If the query needs to be normalized, use the
53/// [`Query::normalize`] function.
54#[derive(Clone, Debug)]
55#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56pub struct Query<'query> {
57    /// Whether the query is normalized.
58    normalized: bool,
59
60    /// The internal query source that is either owned or borrowed.
61    query: Cow<'query, str>,
62}
63
64impl Query<'_> {
65    /// Returns a new query which is identical but has a lifetime tied to this query.
66    pub fn as_borrowed(&self) -> Query {
67        use self::Cow::*;
68
69        let query = match &self.query {
70            Borrowed(borrowed) => *borrowed,
71            Owned(owned) => owned.as_str(),
72        };
73
74        Query {
75            normalized: self.normalized,
76            query: Cow::Borrowed(query),
77        }
78    }
79
80    /// Returns a `str` representation of the query.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// use std::convert::TryFrom;
86    ///
87    /// use uriparse::Query;
88    ///
89    /// let query = Query::try_from("query").unwrap();
90    /// assert_eq!(query.as_str(), "query");
91    /// ```
92    pub fn as_str(&self) -> &str {
93        &self.query
94    }
95
96    /// Converts the [`Query`] into an owned copy.
97    ///
98    /// If you construct the query from a source with a non-static lifetime, you may run into
99    /// lifetime problems due to the way the struct is designed. Calling this function will ensure
100    /// that the returned value has a static lifetime.
101    ///
102    /// This is different from just cloning. Cloning the query will just copy the references, and
103    /// thus the lifetime will remain the same.
104    pub fn into_owned(self) -> Query<'static> {
105        Query {
106            normalized: self.normalized,
107            query: Cow::from(self.query.into_owned()),
108        }
109    }
110
111    /// Returns whether the query is normalized.
112    ///
113    /// A normalized query will have no bytes that are in the unreserved character set
114    /// percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
115    ///
116    /// This function runs in constant-time.
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use std::convert::TryFrom;
122    ///
123    /// use uriparse::Query;
124    ///
125    /// let query = Query::try_from("query").unwrap();
126    /// assert!(query.is_normalized());
127    ///
128    /// let mut query = Query::try_from("%ff%ff").unwrap();
129    /// assert!(!query.is_normalized());
130    /// query.normalize();
131    /// assert!(query.is_normalized());
132    /// ```
133    pub fn is_normalized(&self) -> bool {
134        self.normalized
135    }
136
137    /// Normalizes the query such that it will have no bytes that are in the unreserved character
138    /// set percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
139    ///
140    /// If the query is already normalized, the function will return immediately. Otherwise, if the
141    /// query is not owned, this function will perform an allocation to clone it. The normalization
142    /// itself though, is done in-place with no extra memory allocations required.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use std::convert::TryFrom;
148    ///
149    /// use uriparse::Query;
150    ///
151    /// let mut query = Query::try_from("query").unwrap();
152    /// query.normalize();
153    /// assert_eq!(query, "query");
154    ///
155    /// let mut query = Query::try_from("%ff%41").unwrap();
156    /// assert_eq!(query, "%ff%41");
157    /// query.normalize();
158    /// assert_eq!(query, "%FFA");
159    /// ```
160    pub fn normalize(&mut self) {
161        if !self.normalized {
162            // Unsafe: Queries must be valid ASCII-US, so this is safe.
163            unsafe { normalize_string(&mut self.query.to_mut(), true) };
164            self.normalized = true;
165        }
166    }
167}
168
169impl AsRef<[u8]> for Query<'_> {
170    fn as_ref(&self) -> &[u8] {
171        self.query.as_bytes()
172    }
173}
174
175impl AsRef<str> for Query<'_> {
176    fn as_ref(&self) -> &str {
177        &self.query
178    }
179}
180
181impl Deref for Query<'_> {
182    type Target = str;
183
184    fn deref(&self) -> &Self::Target {
185        &self.query
186    }
187}
188
189impl Display for Query<'_> {
190    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
191        formatter.write_str(&self.query)
192    }
193}
194
195impl Eq for Query<'_> {}
196
197impl<'query> From<Query<'query>> for String {
198    fn from(value: Query<'query>) -> Self {
199        value.to_string()
200    }
201}
202
203impl Hash for Query<'_> {
204    fn hash<H>(&self, state: &mut H)
205    where
206        H: Hasher,
207    {
208        percent_encoded_hash(self.query.as_bytes(), state, true);
209    }
210}
211
212impl PartialEq for Query<'_> {
213    fn eq(&self, other: &Query) -> bool {
214        *self == *other.as_bytes()
215    }
216}
217
218impl PartialEq<[u8]> for Query<'_> {
219    fn eq(&self, other: &[u8]) -> bool {
220        percent_encoded_equality(self.query.as_bytes(), other, true)
221    }
222}
223
224impl<'query> PartialEq<Query<'query>> for [u8] {
225    fn eq(&self, other: &Query<'query>) -> bool {
226        other == self
227    }
228}
229
230impl<'a> PartialEq<&'a [u8]> for Query<'_> {
231    fn eq(&self, other: &&'a [u8]) -> bool {
232        self == *other
233    }
234}
235
236impl<'a, 'query> PartialEq<Query<'query>> for &'a [u8] {
237    fn eq(&self, other: &Query<'query>) -> bool {
238        other == *self
239    }
240}
241
242impl PartialEq<str> for Query<'_> {
243    fn eq(&self, other: &str) -> bool {
244        self == other.as_bytes()
245    }
246}
247
248impl<'query> PartialEq<Query<'query>> for str {
249    fn eq(&self, other: &Query<'query>) -> bool {
250        other == self.as_bytes()
251    }
252}
253
254impl<'a> PartialEq<&'a str> for Query<'_> {
255    fn eq(&self, other: &&'a str) -> bool {
256        self == other.as_bytes()
257    }
258}
259
260impl<'a, 'query> PartialEq<Query<'query>> for &'a str {
261    fn eq(&self, other: &Query<'query>) -> bool {
262        other == self.as_bytes()
263    }
264}
265
266impl<'query> TryFrom<&'query [u8]> for Query<'query> {
267    type Error = QueryError;
268
269    fn try_from(value: &'query [u8]) -> Result<Self, Self::Error> {
270        let (query, rest) = parse_query(value)?;
271
272        if rest.is_empty() {
273            Ok(query)
274        } else {
275            Err(QueryError::InvalidCharacter)
276        }
277    }
278}
279
280impl<'query> TryFrom<&'query str> for Query<'query> {
281    type Error = QueryError;
282
283    fn try_from(value: &'query str) -> Result<Self, Self::Error> {
284        Query::try_from(value.as_bytes())
285    }
286}
287
288/// An error representing an invalid query.
289#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
290#[non_exhaustive]
291pub enum QueryError {
292    /// The fragment contained an invalid character.
293    InvalidCharacter,
294
295    /// The fragment contained an invalid percent encoding (e.g. `"%ZZ"`).
296    InvalidPercentEncoding,
297}
298
299impl Display for QueryError {
300    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
301        use self::QueryError::*;
302
303        match self {
304            InvalidCharacter => write!(formatter, "invalid query character"),
305            InvalidPercentEncoding => write!(formatter, "invalid query percent encoding"),
306        }
307    }
308}
309
310impl Error for QueryError {}
311
312impl From<Infallible> for QueryError {
313    fn from(_: Infallible) -> Self {
314        QueryError::InvalidCharacter
315    }
316}
317
318/// Parses the query from the given byte string.
319pub(crate) fn parse_query(value: &[u8]) -> Result<(Query, &[u8]), QueryError> {
320    let mut bytes = value.iter();
321    let mut end_index = 0;
322    let mut normalized = true;
323
324    while let Some(&byte) = bytes.next() {
325        match QUERY_CHAR_MAP[byte as usize] {
326            0 if byte == b'#' => break,
327            0 => return Err(QueryError::InvalidCharacter),
328            b'%' => match get_percent_encoded_value(bytes.next().cloned(), bytes.next().cloned()) {
329                Ok((hex_value, uppercase)) => {
330                    if !uppercase || UNRESERVED_CHAR_MAP[hex_value as usize] != 0 {
331                        normalized = false;
332                    }
333
334                    end_index += 3;
335                }
336                Err(_) => return Err(QueryError::InvalidPercentEncoding),
337            },
338            _ => end_index += 1,
339        }
340    }
341
342    let (value, rest) = value.split_at(end_index);
343
344    // Unsafe: The loop above makes sure the byte string is valid ASCII-US.
345    let query = Query {
346        normalized,
347        query: Cow::from(unsafe { str::from_utf8_unchecked(value) }),
348    };
349    Ok((query, rest))
350}
351
352#[cfg(test)]
353mod test {
354    use super::*;
355
356    #[test]
357    fn test_query_normalize() {
358        fn test_case(value: &str, expected: &str) {
359            let mut query = Query::try_from(value).unwrap();
360            query.normalize();
361            assert_eq!(query, expected);
362        }
363
364        test_case("", "");
365        test_case("%ff", "%FF");
366        test_case("%41", "A");
367    }
368
369    #[test]
370    fn test_query_parse() {
371        use self::QueryError::*;
372
373        assert_eq!(Query::try_from("").unwrap(), "");
374        assert_eq!(Query::try_from("query").unwrap(), "query");
375        assert_eq!(Query::try_from("qUeRy").unwrap(), "qUeRy");
376        assert_eq!(Query::try_from("%ff%ff%ff%41").unwrap(), "%ff%ff%ff%41");
377
378        assert_eq!(Query::try_from(" "), Err(InvalidCharacter));
379        assert_eq!(Query::try_from("#"), Err(InvalidCharacter));
380        assert_eq!(Query::try_from("%"), Err(InvalidPercentEncoding));
381        assert_eq!(Query::try_from("%f"), Err(InvalidPercentEncoding));
382        assert_eq!(Query::try_from("%zz"), Err(InvalidPercentEncoding));
383    }
384}