uriparse/
fragment.rs

1//! Fragment Component
2//!
3//! See [[RFC3986, Section 3.5](https://tools.ietf.org/html/rfc3986#section-3.5)].
4
5use std::borrow::Cow;
6use std::convert::{Infallible, TryFrom};
7use std::error::Error;
8use std::fmt::{self, Display, Formatter};
9use std::hash::{Hash, Hasher};
10use std::ops::Deref;
11use std::str;
12
13use crate::utility::{
14    get_percent_encoded_value, normalize_string, percent_encoded_equality, percent_encoded_hash,
15    UNRESERVED_CHAR_MAP,
16};
17
18/// A map of byte characters that determines if a character is a valid fragment character.
19#[rustfmt::skip]
20const FRAGMENT_CHAR_MAP: [u8; 256] = [
21 // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
22    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 0
23    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 1
24    0, b'!',    0,    0, b'$', b'%', b'&',b'\'', b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', // 2
25 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';',    0, b'=',    0, b'?', // 3
26 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 4
27 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',    0,    0,    0,    0, b'_', // 5
28    0, b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', // 6
29 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',    0,    0,    0, b'~',    0, // 7
30    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 8
31    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 9
32    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // A
33    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // B
34    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // C
35    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // D
36    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // E
37    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // F
38];
39
40/// The fragment component as defined in
41/// [[RFC3986, Section 3.5](https://tools.ietf.org/html/rfc3986#section-3.5)].
42///
43/// The fragment is case-sensitive. Furthermore, percent-encoding plays no role in equality checking
44/// for characters in the unreserved character set meaning that `"fragment"` and `"fr%61gment"` are
45/// identical. Both of these attributes are reflected in the equality and hash functions.
46///
47/// However, be aware that just because percent-encoding plays no role in equality checking does not
48/// mean that the fragment is normalized. If the fragment needs to be normalized, use the
49/// [`Fragment::normalize`] function.
50#[derive(Clone, Debug)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52pub struct Fragment<'fragment> {
53    /// The internal fragment source that is either owned or borrowed.
54    fragment: Cow<'fragment, str>,
55
56    /// Whether the fragment is normalized.
57    normalized: bool,
58}
59
60impl Fragment<'_> {
61    /// Returns a new fragment which is identical but has a lifetime tied to this fragment.
62    pub fn as_borrowed(&self) -> Fragment {
63        use self::Cow::*;
64
65        let fragment = match &self.fragment {
66            Borrowed(borrowed) => *borrowed,
67            Owned(owned) => owned.as_str(),
68        };
69
70        Fragment {
71            fragment: Cow::Borrowed(fragment),
72            normalized: self.normalized,
73        }
74    }
75
76    /// Returns a `str` representation of the fragment.
77    ///
78    /// # Examples
79    ///
80    /// ```
81    /// use std::convert::TryFrom;
82    ///
83    /// use uriparse::Fragment;
84    ///
85    /// let fragment = Fragment::try_from("fragment").unwrap();
86    /// assert_eq!(fragment.as_str(), "fragment");
87    /// ```
88    pub fn as_str(&self) -> &str {
89        &self.fragment
90    }
91
92    /// Converts the [`Fragment`] into an owned copy.
93    ///
94    /// If you construct the fragment from a source with a non-static lifetime, you may run into
95    /// lifetime problems due to the way the struct is designed. Calling this function will ensure
96    /// that the returned value has a static lifetime.
97    ///
98    /// This is different from just cloning. Cloning the fragment will just copy the references, and
99    /// thus the lifetime will remain the same.
100    pub fn into_owned(self) -> Fragment<'static> {
101        Fragment {
102            fragment: Cow::from(self.fragment.into_owned()),
103            normalized: self.normalized,
104        }
105    }
106
107    /// Returns whether the fragment is normalized.
108    ///
109    /// A normalized fragment will have no bytes that are in the unreserved character set
110    /// percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
111    ///
112    /// This function runs in constant-time.
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// use std::convert::TryFrom;
118    ///
119    /// use uriparse::Fragment;
120    ///
121    /// let fragment = Fragment::try_from("fragment").unwrap();
122    /// assert!(fragment.is_normalized());
123    ///
124    /// let mut fragment = Fragment::try_from("%ff%ff").unwrap();
125    /// assert!(!fragment.is_normalized());
126    /// fragment.normalize();
127    /// assert!(fragment.is_normalized());
128    /// ```
129    pub fn is_normalized(&self) -> bool {
130        self.normalized
131    }
132
133    /// Normalizes the fragment such that it will have no bytes that are in the unreserved character
134    /// set percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
135    ///
136    /// If the fragment is already normalized, the function will return immediately. Otherwise, if
137    /// the fragment is not owned, this function will perform an allocation to clone it. The
138    /// normalization itself though, is done in-place with no extra memory allocations required.
139    ///
140    /// # Examples
141    ///
142    /// ```
143    /// use std::convert::TryFrom;
144    ///
145    /// use uriparse::Fragment;
146    ///
147    /// let mut fragment = Fragment::try_from("fragment").unwrap();
148    /// fragment.normalize();
149    /// assert_eq!(fragment, "fragment");
150    ///
151    /// let mut fragment = Fragment::try_from("%ff%41").unwrap();
152    /// assert_eq!(fragment, "%ff%41");
153    /// fragment.normalize();
154    /// assert_eq!(fragment, "%FFA");
155    /// ```
156    pub fn normalize(&mut self) {
157        if !self.normalized {
158            // Unsafe: Fragments must be valid ASCII-US, so this is safe.
159            unsafe { normalize_string(&mut self.fragment.to_mut(), true) };
160            self.normalized = true;
161        }
162    }
163}
164
165impl AsRef<[u8]> for Fragment<'_> {
166    fn as_ref(&self) -> &[u8] {
167        self.fragment.as_bytes()
168    }
169}
170
171impl AsRef<str> for Fragment<'_> {
172    fn as_ref(&self) -> &str {
173        &self.fragment
174    }
175}
176
177impl Deref for Fragment<'_> {
178    type Target = str;
179
180    fn deref(&self) -> &Self::Target {
181        &self.fragment
182    }
183}
184
185impl Display for Fragment<'_> {
186    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
187        formatter.write_str(&self.fragment)
188    }
189}
190
191impl Eq for Fragment<'_> {}
192
193impl<'fragment> From<Fragment<'fragment>> for String {
194    fn from(value: Fragment<'fragment>) -> Self {
195        value.to_string()
196    }
197}
198
199impl Hash for Fragment<'_> {
200    fn hash<H>(&self, state: &mut H)
201    where
202        H: Hasher,
203    {
204        percent_encoded_hash(self.fragment.as_bytes(), state, true);
205    }
206}
207
208impl PartialEq for Fragment<'_> {
209    fn eq(&self, other: &Fragment) -> bool {
210        *self == *other.as_bytes()
211    }
212}
213
214impl PartialEq<[u8]> for Fragment<'_> {
215    fn eq(&self, other: &[u8]) -> bool {
216        percent_encoded_equality(self.fragment.as_bytes(), other, true)
217    }
218}
219
220impl<'fragment> PartialEq<Fragment<'fragment>> for [u8] {
221    fn eq(&self, other: &Fragment<'fragment>) -> bool {
222        other == self
223    }
224}
225
226impl<'a> PartialEq<&'a [u8]> for Fragment<'_> {
227    fn eq(&self, other: &&'a [u8]) -> bool {
228        self == *other
229    }
230}
231
232impl<'a, 'fragment> PartialEq<Fragment<'fragment>> for &'a [u8] {
233    fn eq(&self, other: &Fragment<'fragment>) -> bool {
234        other == *self
235    }
236}
237
238impl PartialEq<str> for Fragment<'_> {
239    fn eq(&self, other: &str) -> bool {
240        self == other.as_bytes()
241    }
242}
243
244impl<'fragment> PartialEq<Fragment<'fragment>> for str {
245    fn eq(&self, other: &Fragment<'fragment>) -> bool {
246        other == self.as_bytes()
247    }
248}
249
250impl<'a> PartialEq<&'a str> for Fragment<'_> {
251    fn eq(&self, other: &&'a str) -> bool {
252        self == other.as_bytes()
253    }
254}
255
256impl<'a, 'fragment> PartialEq<Fragment<'fragment>> for &'a str {
257    fn eq(&self, other: &Fragment<'fragment>) -> bool {
258        other == self.as_bytes()
259    }
260}
261
262impl<'fragment> TryFrom<&'fragment [u8]> for Fragment<'fragment> {
263    type Error = FragmentError;
264
265    fn try_from(value: &'fragment [u8]) -> Result<Self, Self::Error> {
266        let mut bytes = value.iter();
267        let mut normalized = true;
268
269        while let Some(&byte) = bytes.next() {
270            match FRAGMENT_CHAR_MAP[byte as usize] {
271                0 => return Err(FragmentError::InvalidCharacter),
272                b'%' => {
273                    match get_percent_encoded_value(bytes.next().cloned(), bytes.next().cloned()) {
274                        Ok((hex_value, uppercase)) => {
275                            if !uppercase || UNRESERVED_CHAR_MAP[hex_value as usize] != 0 {
276                                normalized = false;
277                            }
278                        }
279                        Err(_) => return Err(FragmentError::InvalidPercentEncoding),
280                    }
281                }
282                _ => (),
283            }
284        }
285
286        // Unsafe: The loop above makes sure the byte string is valid ASCII-US.
287        Ok(Fragment {
288            fragment: Cow::from(unsafe { str::from_utf8_unchecked(value) }),
289            normalized,
290        })
291    }
292}
293
294impl<'fragment> TryFrom<&'fragment str> for Fragment<'fragment> {
295    type Error = FragmentError;
296
297    fn try_from(value: &'fragment str) -> Result<Self, Self::Error> {
298        Fragment::try_from(value.as_bytes())
299    }
300}
301
302/// An error representing an invalid fragment.
303#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
304#[non_exhaustive]
305pub enum FragmentError {
306    /// The fragment contained an invalid character.
307    InvalidCharacter,
308
309    /// The fragment contained an invalid percent encoding (e.g. `"%ZZ"`).
310    InvalidPercentEncoding,
311}
312
313impl Display for FragmentError {
314    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
315        use self::FragmentError::*;
316
317        match self {
318            InvalidCharacter => write!(formatter, "invalid fragment character"),
319            InvalidPercentEncoding => write!(formatter, "invalid fragment percent encoding"),
320        }
321    }
322}
323
324impl Error for FragmentError {}
325
326impl From<Infallible> for FragmentError {
327    fn from(_: Infallible) -> Self {
328        FragmentError::InvalidCharacter
329    }
330}
331
332#[cfg(test)]
333mod test {
334    use super::*;
335
336    #[test]
337    fn test_fragment_normalize() {
338        fn test_case(value: &str, expected: &str) {
339            let mut fragment = Fragment::try_from(value).unwrap();
340            fragment.normalize();
341            assert_eq!(fragment, expected);
342        }
343
344        test_case("", "");
345        test_case("%ff", "%FF");
346        test_case("%41", "A");
347    }
348
349    #[test]
350    fn test_fragment_parse() {
351        use self::FragmentError::*;
352
353        assert_eq!(Fragment::try_from("").unwrap(), "");
354        assert_eq!(Fragment::try_from("fragment").unwrap(), "fragment");
355        assert_eq!(Fragment::try_from("fRaGmEnT").unwrap(), "fRaGmEnT");
356        assert_eq!(Fragment::try_from("%ff%ff%ff%41").unwrap(), "%ff%ff%ff%41");
357
358        assert_eq!(Fragment::try_from(" "), Err(InvalidCharacter));
359        assert_eq!(Fragment::try_from("%"), Err(InvalidPercentEncoding));
360        assert_eq!(Fragment::try_from("%f"), Err(InvalidPercentEncoding));
361        assert_eq!(Fragment::try_from("%zz"), Err(InvalidPercentEncoding));
362    }
363}