compact_str/lib.rs
1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![no_std]
4
5#[cfg(feature = "std")]
6#[macro_use]
7extern crate std;
8
9#[cfg_attr(test, macro_use)]
10extern crate alloc;
11
12use alloc::borrow::Cow;
13use alloc::boxed::Box;
14use alloc::string::String;
15#[doc(hidden)] // Referenced in macros.
16pub use core;
17use core::borrow::{Borrow, BorrowMut};
18use core::cmp::Ordering;
19use core::hash::{Hash, Hasher};
20use core::iter::FusedIterator;
21use core::ops::{Add, AddAssign, Bound, Deref, DerefMut, RangeBounds};
22use core::str::{FromStr, Utf8Error};
23use core::{fmt, mem, slice};
24#[cfg(feature = "std")]
25use std::ffi::OsStr;
26
27mod features;
28mod macros;
29mod unicode_data;
30
31mod repr;
32use repr::Repr;
33
34mod traits;
35pub use traits::{CompactStringExt, ToCompactString};
36
37#[cfg(test)]
38mod tests;
39
40/// A [`CompactString`] is a compact string type that can be used almost anywhere a
41/// [`String`] or [`str`] can be used.
42///
43/// ## Using `CompactString`
44/// ```
45/// use compact_str::CompactString;
46/// # use std::collections::HashMap;
47///
48/// // CompactString auto derefs into a str so you can use all methods from `str`
49/// // that take a `&self`
50/// if CompactString::new("hello world!").is_ascii() {
51/// println!("we're all ASCII")
52/// }
53///
54/// // You can use a CompactString in collections like you would a String or &str
55/// let mut map: HashMap<CompactString, CompactString> = HashMap::new();
56///
57/// // directly construct a new `CompactString`
58/// map.insert(CompactString::new("nyc"), CompactString::new("empire state building"));
59/// // create a `CompactString` from a `&str`
60/// map.insert("sf".into(), "transamerica pyramid".into());
61/// // create a `CompactString` from a `String`
62/// map.insert(String::from("sea").into(), String::from("space needle").into());
63///
64/// fn wrapped_print<T: AsRef<str>>(text: T) {
65/// println!("{}", text.as_ref());
66/// }
67///
68/// // CompactString impls AsRef<str> and Borrow<str>, so it can be used anywhere
69/// // that expects a generic string
70/// if let Some(building) = map.get("nyc") {
71/// wrapped_print(building);
72/// }
73///
74/// // CompactString can also be directly compared to a String or &str
75/// assert_eq!(CompactString::new("chicago"), "chicago");
76/// assert_eq!(CompactString::new("houston"), String::from("houston"));
77/// ```
78///
79/// # Converting from a `String`
80/// It's important that a `CompactString` interops well with `String`, so you can easily use both in
81/// your code base.
82///
83/// `CompactString` implements `From<String>` and operates in the following manner:
84/// - Eagerly inlines the string, possibly dropping excess capacity
85/// - Otherwise re-uses the same underlying buffer from `String`
86///
87/// ```
88/// use compact_str::CompactString;
89///
90/// // eagerly inlining
91/// let short = String::from("hello world");
92/// let short_c = CompactString::from(short);
93/// assert!(!short_c.is_heap_allocated());
94///
95/// // dropping excess capacity
96/// let mut excess = String::with_capacity(256);
97/// excess.push_str("abc");
98///
99/// let excess_c = CompactString::from(excess);
100/// assert!(!excess_c.is_heap_allocated());
101/// assert!(excess_c.capacity() < 256);
102///
103/// // re-using the same buffer
104/// let long = String::from("this is a longer string that will be heap allocated");
105///
106/// let long_ptr = long.as_ptr();
107/// let long_len = long.len();
108/// let long_cap = long.capacity();
109///
110/// let mut long_c = CompactString::from(long);
111/// assert!(long_c.is_heap_allocated());
112///
113/// let cpt_ptr = long_c.as_ptr();
114/// let cpt_len = long_c.len();
115/// let cpt_cap = long_c.capacity();
116///
117/// // the original String and the CompactString point to the same place in memory, buffer re-use!
118/// assert_eq!(cpt_ptr, long_ptr);
119/// assert_eq!(cpt_len, long_len);
120/// assert_eq!(cpt_cap, long_cap);
121/// ```
122///
123/// ### Prevent Eagerly Inlining
124/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which
125/// might not always be desirable if you're converting a very large amount of `String`s. If your
126/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API.
127#[repr(transparent)]
128pub struct CompactString(Repr);
129
130impl CompactString {
131 /// Creates a new [`CompactString`] from any type that implements `AsRef<str>`.
132 /// If the string is short enough, then it will be inlined on the stack!
133 ///
134 /// In a `static` or `const` context you can use the method [`CompactString::const_new()`].
135 ///
136 /// # Examples
137 ///
138 /// ### Inlined
139 /// ```
140 /// # use compact_str::CompactString;
141 /// // We can inline strings up to 12 characters long on 32-bit architectures...
142 /// #[cfg(target_pointer_width = "32")]
143 /// let s = "i'm 12 chars";
144 /// // ...and up to 24 characters on 64-bit architectures!
145 /// #[cfg(target_pointer_width = "64")]
146 /// let s = "i am 24 characters long!";
147 ///
148 /// let compact = CompactString::new(&s);
149 ///
150 /// assert_eq!(compact, s);
151 /// // we are not allocated on the heap!
152 /// assert!(!compact.is_heap_allocated());
153 /// ```
154 ///
155 /// ### Heap
156 /// ```
157 /// # use compact_str::CompactString;
158 /// // For longer strings though, we get allocated on the heap
159 /// let long = "I am a longer string that will be allocated on the heap";
160 /// let compact = CompactString::new(long);
161 ///
162 /// assert_eq!(compact, long);
163 /// // we are allocated on the heap!
164 /// assert!(compact.is_heap_allocated());
165 /// ```
166 ///
167 /// ### Creation
168 /// ```
169 /// use compact_str::CompactString;
170 ///
171 /// // Using a `&'static str`
172 /// let s = "hello world!";
173 /// let hello = CompactString::new(&s);
174 ///
175 /// // Using a `String`
176 /// let u = String::from("๐ฆ๐");
177 /// let unicorn = CompactString::new(u);
178 ///
179 /// // Using a `Box<str>`
180 /// let b: Box<str> = String::from("๐ฆ๐ฆ๐ฆ").into_boxed_str();
181 /// let boxed = CompactString::new(&b);
182 /// ```
183 #[inline]
184 #[track_caller]
185 pub fn new<T: AsRef<str>>(text: T) -> Self {
186 Self::try_new(text).unwrap_with_msg()
187 }
188
189 /// Fallible version of [`CompactString::new()`]
190 ///
191 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
192 /// Otherwise it behaves the same as [`CompactString::new()`].
193 #[inline]
194 pub fn try_new<T: AsRef<str>>(text: T) -> Result<Self, ReserveError> {
195 Repr::new(text.as_ref()).map(CompactString)
196 }
197
198 /// Creates a new inline [`CompactString`] from `&'static str` at compile time.
199 /// Complexity: O(1). As an optimization, short strings get inlined.
200 ///
201 /// In a dynamic context you can use the method [`CompactString::new()`].
202 ///
203 /// # Examples
204 /// ```
205 /// use compact_str::CompactString;
206 ///
207 /// const DEFAULT_NAME: CompactString = CompactString::const_new("untitled");
208 /// ```
209 #[inline]
210 pub const fn const_new(text: &'static str) -> Self {
211 CompactString(Repr::const_new(text))
212 }
213
214 /// Get back the `&'static str` constructed by [`CompactString::const_new`].
215 ///
216 /// If the string was short enough that it could be inlined, then it was inline, and
217 /// this method will return `None`.
218 ///
219 /// # Examples
220 /// ```
221 /// use compact_str::CompactString;
222 ///
223 /// const DEFAULT_NAME: CompactString =
224 /// CompactString::const_new("That is not dead which can eternal lie.");
225 /// assert_eq!(
226 /// DEFAULT_NAME.as_static_str().unwrap(),
227 /// "That is not dead which can eternal lie.",
228 /// );
229 /// ```
230 #[inline]
231 #[rustversion::attr(since(1.64), const)]
232 pub fn as_static_str(&self) -> Option<&'static str> {
233 self.0.as_static_str()
234 }
235
236 /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes.
237 ///
238 /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically,
239 /// if the string has a length less than or equal to `std::mem::size_of::<String>` bytes
240 /// then it will be inlined. This also means that `CompactString`s have a minimum capacity
241 /// of `std::mem::size_of::<String>`.
242 ///
243 /// # Panics
244 ///
245 /// This method panics if the system is out-of-memory.
246 /// Use [`CompactString::try_with_capacity()`] if you want to handle such a problem manually.
247 ///
248 /// # Examples
249 ///
250 /// ### "zero" Capacity
251 /// ```
252 /// # use compact_str::CompactString;
253 /// // Creating a CompactString with a capacity of 0 will create
254 /// // one with capacity of std::mem::size_of::<String>();
255 /// let empty = CompactString::with_capacity(0);
256 /// let min_size = std::mem::size_of::<String>();
257 ///
258 /// assert_eq!(empty.capacity(), min_size);
259 /// assert_ne!(0, min_size);
260 /// assert!(!empty.is_heap_allocated());
261 /// ```
262 ///
263 /// ### Max Inline Size
264 /// ```
265 /// # use compact_str::CompactString;
266 /// // Creating a CompactString with a capacity of std::mem::size_of::<String>()
267 /// // will not heap allocate.
268 /// let str_size = std::mem::size_of::<String>();
269 /// let empty = CompactString::with_capacity(str_size);
270 ///
271 /// assert_eq!(empty.capacity(), str_size);
272 /// assert!(!empty.is_heap_allocated());
273 /// ```
274 ///
275 /// ### Heap Allocating
276 /// ```
277 /// # use compact_str::CompactString;
278 /// // If you create a `CompactString` with a capacity greater than
279 /// // `std::mem::size_of::<String>`, it will heap allocated. For heap
280 /// // allocated strings we have a minimum capacity
281 ///
282 /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::<usize>() * 4;
283 ///
284 /// let heap_size = std::mem::size_of::<String>() + 1;
285 /// let empty = CompactString::with_capacity(heap_size);
286 ///
287 /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY);
288 /// assert!(empty.is_heap_allocated());
289 /// ```
290 #[inline]
291 #[track_caller]
292 pub fn with_capacity(capacity: usize) -> Self {
293 Self::try_with_capacity(capacity).unwrap_with_msg()
294 }
295
296 /// Fallible version of [`CompactString::with_capacity()`]
297 ///
298 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
299 /// Otherwise it behaves the same as [`CompactString::with_capacity()`].
300 #[inline]
301 pub fn try_with_capacity(capacity: usize) -> Result<Self, ReserveError> {
302 Repr::with_capacity(capacity).map(CompactString)
303 }
304
305 /// Convert a slice of bytes into a [`CompactString`].
306 ///
307 /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
308 /// This method converts from an arbitrary contiguous collection of bytes into a
309 /// [`CompactString`], failing if the provided bytes are not `UTF-8`.
310 ///
311 /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes,
312 /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf`
313 ///
314 /// # Examples
315 /// ### Valid UTF-8
316 /// ```
317 /// # use compact_str::CompactString;
318 /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175];
319 /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8");
320 ///
321 /// assert_eq!(compact, "๐ฆ๐ฏ");
322 /// ```
323 ///
324 /// ### Invalid UTF-8
325 /// ```
326 /// # use compact_str::CompactString;
327 /// let bytes = vec![255, 255, 255];
328 /// let result = CompactString::from_utf8(bytes);
329 ///
330 /// assert!(result.is_err());
331 /// ```
332 #[inline]
333 pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
334 Repr::from_utf8(buf).map(CompactString)
335 }
336
337 /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains
338 /// valid UTF-8.
339 ///
340 /// See the safe version, [`CompactString::from_utf8`], for more details.
341 ///
342 /// # Safety
343 ///
344 /// * The contents pased to this method must be valid UTF-8.
345 ///
346 /// It's very important that this constraint is upheld because the internals of a
347 /// [`CompactString`] (e.g. determing an inline string versus a heap allocated string) rely on
348 /// the [`CompactString`] containing valid UTF-8. If this constraint is violated any further
349 /// use of the returned [`CompactString`] (including dropping it) can cause undefined behavior.
350 ///
351 /// # Examples
352 ///
353 /// Basic usage:
354 ///
355 /// ```
356 /// # use compact_str::CompactString;
357 /// // some bytes, in a vector
358 /// let sparkle_heart = vec![240, 159, 146, 150];
359 ///
360 /// let sparkle_heart = unsafe {
361 /// CompactString::from_utf8_unchecked(sparkle_heart)
362 /// };
363 ///
364 /// assert_eq!("๐", sparkle_heart);
365 /// ```
366 #[inline]
367 #[must_use]
368 #[track_caller]
369 pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
370 Repr::from_utf8_unchecked(buf)
371 .map(CompactString)
372 .unwrap_with_msg()
373 }
374
375 /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a
376 /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data.
377 ///
378 /// # Examples
379 /// ### Valid UTF-16
380 /// ```
381 /// # use compact_str::CompactString;
382 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
383 /// let compact = CompactString::from_utf16(buf).unwrap();
384 ///
385 /// assert_eq!(compact, "๐music");
386 /// ```
387 ///
388 /// ### Invalid UTF-16
389 /// ```
390 /// # use compact_str::CompactString;
391 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
392 /// let res = CompactString::from_utf16(buf);
393 ///
394 /// assert!(res.is_err());
395 /// ```
396 #[inline]
397 pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
398 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
399 // even though the size of our iterator, `buf`, is known ahead of time.
400 //
401 // rustlang issue #48994 is tracking the fix
402
403 let buf = buf.as_ref();
404 let mut ret = CompactString::with_capacity(buf.len());
405 for c in core::char::decode_utf16(buf.iter().copied()) {
406 if let Ok(c) = c {
407 ret.push(c);
408 } else {
409 return Err(Utf16Error(()));
410 }
411 }
412 Ok(ret)
413 }
414
415 /// Decode a UTF-16โencoded slice `v` into a `CompactString`, replacing invalid data with
416 /// the replacement character (`U+FFFD`), ๏ฟฝ.
417 ///
418 /// # Examples
419 ///
420 /// Basic usage:
421 ///
422 /// ```
423 /// # use compact_str::CompactString;
424 /// // ๐mus<invalid>ic<invalid>
425 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
426 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
427 /// 0xD834];
428 ///
429 /// assert_eq!(CompactString::from("๐mus\u{FFFD}ic\u{FFFD}"),
430 /// CompactString::from_utf16_lossy(v));
431 /// ```
432 #[inline]
433 pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
434 let buf = buf.as_ref();
435 let mut ret = CompactString::with_capacity(buf.len());
436 for c in core::char::decode_utf16(buf.iter().copied()) {
437 match c {
438 Ok(c) => ret.push(c),
439 Err(_) => ret.push_str("๏ฟฝ"),
440 }
441 }
442 ret
443 }
444
445 /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
446 ///
447 /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
448 /// bytes long, therefore the return value of this method might not be what a human considers
449 /// the length of the string.
450 ///
451 /// # Examples
452 /// ```
453 /// # use compact_str::CompactString;
454 /// let ascii = CompactString::new("hello world");
455 /// assert_eq!(ascii.len(), 11);
456 ///
457 /// let emoji = CompactString::new("๐ฑ");
458 /// assert_eq!(emoji.len(), 4);
459 /// ```
460 #[inline]
461 pub fn len(&self) -> usize {
462 self.0.len()
463 }
464
465 /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise
466 ///
467 /// # Examples
468 /// ```
469 /// # use compact_str::CompactString;
470 /// let mut msg = CompactString::new("");
471 /// assert!(msg.is_empty());
472 ///
473 /// // add some characters
474 /// msg.push_str("hello reader!");
475 /// assert!(!msg.is_empty());
476 /// ```
477 #[inline]
478 pub fn is_empty(&self) -> bool {
479 self.0.is_empty()
480 }
481
482 /// Returns the capacity of the [`CompactString`], in bytes.
483 ///
484 /// # Note
485 /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::<String>()`
486 ///
487 /// # Examples
488 /// ### Minimum Size
489 /// ```
490 /// # use compact_str::CompactString;
491 /// let min_size = std::mem::size_of::<String>();
492 /// let compact = CompactString::new("");
493 ///
494 /// assert!(compact.capacity() >= min_size);
495 /// ```
496 ///
497 /// ### Heap Allocated
498 /// ```
499 /// # use compact_str::CompactString;
500 /// let compact = CompactString::with_capacity(128);
501 /// assert_eq!(compact.capacity(), 128);
502 /// ```
503 #[inline]
504 pub fn capacity(&self) -> usize {
505 self.0.capacity()
506 }
507
508 /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than
509 /// its length. The capacity may be increased by more than `additional` bytes if it chooses,
510 /// to prevent frequent reallocations.
511 ///
512 /// # Note
513 /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::<String>()`
514 /// * Reserving additional bytes may cause the `CompactString` to become heap allocated
515 ///
516 /// # Panics
517 /// This method panics if the new capacity overflows `usize` or if the system is out-of-memory.
518 /// Use [`CompactString::try_reserve()`] if you want to handle such a problem manually.
519 ///
520 /// # Examples
521 /// ```
522 /// # use compact_str::CompactString;
523 ///
524 /// const WORD: usize = std::mem::size_of::<usize>();
525 /// let mut compact = CompactString::default();
526 /// assert!(compact.capacity() >= (WORD * 3) - 1);
527 ///
528 /// compact.reserve(200);
529 /// assert!(compact.is_heap_allocated());
530 /// assert!(compact.capacity() >= 200);
531 /// ```
532 #[inline]
533 #[track_caller]
534 pub fn reserve(&mut self, additional: usize) {
535 self.try_reserve(additional).unwrap_with_msg()
536 }
537
538 /// Fallible version of [`CompactString::reserve()`]
539 ///
540 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`]
541 /// Otherwise it behaves the same as [`CompactString::reserve()`].
542 #[inline]
543 pub fn try_reserve(&mut self, additional: usize) -> Result<(), ReserveError> {
544 self.0.reserve(additional)
545 }
546
547 /// Returns a string slice containing the entire [`CompactString`].
548 ///
549 /// # Examples
550 /// ```
551 /// # use compact_str::CompactString;
552 /// let s = CompactString::new("hello");
553 ///
554 /// assert_eq!(s.as_str(), "hello");
555 /// ```
556 #[inline]
557 pub fn as_str(&self) -> &str {
558 self.0.as_str()
559 }
560
561 /// Returns a mutable string slice containing the entire [`CompactString`].
562 ///
563 /// # Examples
564 /// ```
565 /// # use compact_str::CompactString;
566 /// let mut s = CompactString::new("hello");
567 /// s.as_mut_str().make_ascii_uppercase();
568 ///
569 /// assert_eq!(s.as_str(), "HELLO");
570 /// ```
571 #[inline]
572 pub fn as_mut_str(&mut self) -> &mut str {
573 let len = self.len();
574 unsafe { core::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
575 }
576
577 unsafe fn spare_capacity_mut(&mut self) -> &mut [mem::MaybeUninit<u8>] {
578 let buf = self.0.as_mut_buf();
579 let ptr = buf.as_mut_ptr();
580 let cap = buf.len();
581 let len = self.len();
582
583 slice::from_raw_parts_mut(ptr.add(len) as *mut mem::MaybeUninit<u8>, cap - len)
584 }
585
586 /// Returns a byte slice of the [`CompactString`]'s contents.
587 ///
588 /// # Examples
589 /// ```
590 /// # use compact_str::CompactString;
591 /// let s = CompactString::new("hello");
592 ///
593 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
594 /// ```
595 #[inline]
596 pub fn as_bytes(&self) -> &[u8] {
597 self.0.as_slice()
598 }
599
600 // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
601 //
602 /// Provides a mutable reference to the underlying buffer of bytes.
603 ///
604 /// # Safety
605 /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must
606 /// guarantee that any modifications made to the underlying buffer are valid UTF-8.
607 ///
608 /// # Examples
609 /// ```
610 /// # use compact_str::CompactString;
611 /// let mut s = CompactString::new("hello");
612 ///
613 /// let slice = unsafe { s.as_mut_bytes() };
614 /// // copy bytes into our string
615 /// slice[5..11].copy_from_slice(" world".as_bytes());
616 /// // set the len of the string
617 /// unsafe { s.set_len(11) };
618 ///
619 /// assert_eq!(s, "hello world");
620 /// ```
621 #[inline]
622 pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
623 self.0.as_mut_buf()
624 }
625
626 /// Appends the given [`char`] to the end of this [`CompactString`].
627 ///
628 /// # Examples
629 /// ```
630 /// # use compact_str::CompactString;
631 /// let mut s = CompactString::new("foo");
632 ///
633 /// s.push('b');
634 /// s.push('a');
635 /// s.push('r');
636 ///
637 /// assert_eq!("foobar", s);
638 /// ```
639 pub fn push(&mut self, ch: char) {
640 self.push_str(ch.encode_utf8(&mut [0; 4]));
641 }
642
643 /// Removes the last character from the [`CompactString`] and returns it.
644 /// Returns `None` if this [`CompactString`] is empty.
645 ///
646 /// # Examples
647 /// ```
648 /// # use compact_str::CompactString;
649 /// let mut s = CompactString::new("abc");
650 ///
651 /// assert_eq!(s.pop(), Some('c'));
652 /// assert_eq!(s.pop(), Some('b'));
653 /// assert_eq!(s.pop(), Some('a'));
654 ///
655 /// assert_eq!(s.pop(), None);
656 /// ```
657 #[inline]
658 pub fn pop(&mut self) -> Option<char> {
659 self.0.pop()
660 }
661
662 /// Appends a given string slice onto the end of this [`CompactString`]
663 ///
664 /// # Examples
665 /// ```
666 /// # use compact_str::CompactString;
667 /// let mut s = CompactString::new("abc");
668 ///
669 /// s.push_str("123");
670 ///
671 /// assert_eq!("abc123", s);
672 /// ```
673 #[inline]
674 pub fn push_str(&mut self, s: &str) {
675 self.0.push_str(s)
676 }
677
678 /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it.
679 ///
680 /// This is an *O*(*n*) operation, as it requires copying every element in the
681 /// buffer.
682 ///
683 /// # Panics
684 ///
685 /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length,
686 /// or if it does not lie on a [`char`] boundary.
687 ///
688 /// # Examples
689 ///
690 /// ### Basic usage:
691 ///
692 /// ```
693 /// # use compact_str::CompactString;
694 /// let mut c = CompactString::from("hello world");
695 ///
696 /// assert_eq!(c.remove(0), 'h');
697 /// assert_eq!(c, "ello world");
698 ///
699 /// assert_eq!(c.remove(5), 'w');
700 /// assert_eq!(c, "ello orld");
701 /// ```
702 ///
703 /// ### Past total length:
704 ///
705 /// ```should_panic
706 /// # use compact_str::CompactString;
707 /// let mut c = CompactString::from("hello there!");
708 /// c.remove(100);
709 /// ```
710 ///
711 /// ### Not on char boundary:
712 ///
713 /// ```should_panic
714 /// # use compact_str::CompactString;
715 /// let mut c = CompactString::from("๐ฆ");
716 /// c.remove(1);
717 /// ```
718 #[inline]
719 pub fn remove(&mut self, idx: usize) -> char {
720 let len = self.len();
721 let substr = &mut self.as_mut_str()[idx..];
722
723 // get the char we want to remove
724 let ch = substr
725 .chars()
726 .next()
727 .expect("cannot remove a char from the end of a string");
728 let ch_len = ch.len_utf8();
729
730 // shift everything back one character
731 let num_bytes = substr.len() - ch_len;
732 let ptr = substr.as_mut_ptr();
733
734 // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes,
735 // and are properly aligned
736 unsafe {
737 core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
738 self.set_len(len - ch_len);
739 }
740
741 ch
742 }
743
744 /// Forces the length of the [`CompactString`] to `new_len`.
745 ///
746 /// This is a low-level operation that maintains none of the normal invariants for
747 /// `CompactString`. If you want to modify the `CompactString` you should use methods like
748 /// `push`, `push_str` or `pop`.
749 ///
750 /// # Safety
751 /// * `new_len` must be less than or equal to `capacity()`
752 /// * The elements at `old_len..new_len` must be initialized
753 #[inline]
754 pub unsafe fn set_len(&mut self, new_len: usize) {
755 self.0.set_len(new_len)
756 }
757
758 /// Returns whether or not the [`CompactString`] is heap allocated.
759 ///
760 /// # Examples
761 /// ### Inlined
762 /// ```
763 /// # use compact_str::CompactString;
764 /// let hello = CompactString::new("hello world");
765 ///
766 /// assert!(!hello.is_heap_allocated());
767 /// ```
768 ///
769 /// ### Heap Allocated
770 /// ```
771 /// # use compact_str::CompactString;
772 /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 ๐ฅ");
773 ///
774 /// assert!(msg.is_heap_allocated());
775 /// ```
776 #[inline]
777 pub fn is_heap_allocated(&self) -> bool {
778 self.0.is_heap_allocated()
779 }
780
781 /// Ensure that the given range is inside the set data, and that no codepoints are split.
782 ///
783 /// Returns the range `start..end` as a tuple.
784 #[inline]
785 fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
786 #[cold]
787 #[inline(never)]
788 fn illegal_range() -> ! {
789 panic!("illegal range");
790 }
791
792 let start = match range.start_bound() {
793 Bound::Included(&n) => n,
794 Bound::Excluded(&n) => match n.checked_add(1) {
795 Some(n) => n,
796 None => illegal_range(),
797 },
798 Bound::Unbounded => 0,
799 };
800 let end = match range.end_bound() {
801 Bound::Included(&n) => match n.checked_add(1) {
802 Some(n) => n,
803 None => illegal_range(),
804 },
805 Bound::Excluded(&n) => n,
806 Bound::Unbounded => self.len(),
807 };
808 if end < start {
809 illegal_range();
810 }
811
812 let s = self.as_str();
813 if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
814 illegal_range();
815 }
816
817 (start, end)
818 }
819
820 /// Removes the specified range in the [`CompactString`],
821 /// and replaces it with the given string.
822 /// The given string doesn't need to be the same length as the range.
823 ///
824 /// # Panics
825 ///
826 /// Panics if the starting point or end point do not lie on a [`char`]
827 /// boundary, or if they're out of bounds.
828 ///
829 /// # Examples
830 ///
831 /// Basic usage:
832 ///
833 /// ```
834 /// # use compact_str::CompactString;
835 /// let mut s = CompactString::new("Hello, world!");
836 ///
837 /// s.replace_range(7..12, "WORLD");
838 /// assert_eq!(s, "Hello, WORLD!");
839 ///
840 /// s.replace_range(7..=11, "you");
841 /// assert_eq!(s, "Hello, you!");
842 ///
843 /// s.replace_range(5.., "! Is it me you're looking for?");
844 /// assert_eq!(s, "Hello! Is it me you're looking for?");
845 /// ```
846 #[inline]
847 pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
848 let (start, end) = self.ensure_range(range);
849 let dest_len = end - start;
850 match dest_len.cmp(&replace_with.len()) {
851 Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
852 Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
853 Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
854 }
855 }
856
857 /// Replace into the same size.
858 unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
859 core::ptr::copy_nonoverlapping(
860 replace_with.as_ptr(),
861 self.as_mut_ptr().add(start),
862 end - start,
863 );
864 }
865
866 /// Replace, so self.len() gets smaller.
867 unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
868 let total_len = self.len();
869 let dest_len = end - start;
870 let new_len = total_len - (dest_len - replace_with.len());
871 let amount = total_len - end;
872 let data = self.as_mut_ptr();
873 // first insert the replacement string, overwriting the current content
874 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
875 // then move the tail of the CompactString forward to its new place, filling the gap
876 core::ptr::copy(
877 data.add(total_len - amount),
878 data.add(new_len - amount),
879 amount,
880 );
881 // and lastly we set the new length
882 self.set_len(new_len);
883 }
884
885 /// Replace, so self.len() gets bigger.
886 unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
887 let dest_len = end - start;
888 self.reserve(replace_with.len() - dest_len);
889 let total_len = self.len();
890 let new_len = total_len + (replace_with.len() - dest_len);
891 let amount = total_len - end;
892 // first grow the string, so MIRI knows that the full range is usable
893 self.set_len(new_len);
894 let data = self.as_mut_ptr();
895 // then move the tail of the CompactString back to its new place
896 core::ptr::copy(
897 data.add(total_len - amount),
898 data.add(new_len - amount),
899 amount,
900 );
901 // and lastly insert the replacement string
902 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
903 }
904
905 /// Creates a new [`CompactString`] by repeating a string `n` times.
906 ///
907 /// # Panics
908 ///
909 /// This function will panic if the capacity would overflow.
910 ///
911 /// # Examples
912 ///
913 /// Basic usage:
914 ///
915 /// ```
916 /// use compact_str::CompactString;
917 /// assert_eq!(CompactString::new("abc").repeat(4), CompactString::new("abcabcabcabc"));
918 /// ```
919 ///
920 /// A panic upon overflow:
921 ///
922 /// ```should_panic
923 /// use compact_str::CompactString;
924 ///
925 /// // this will panic at runtime
926 /// let huge = CompactString::new("0123456789abcdef").repeat(usize::MAX);
927 /// ```
928 #[must_use]
929 pub fn repeat(&self, n: usize) -> Self {
930 if n == 0 || self.is_empty() {
931 Self::const_new("")
932 } else if n == 1 {
933 self.clone()
934 } else {
935 let mut out = Self::with_capacity(self.len() * n);
936 (0..n).for_each(|_| out.push_str(self));
937 out
938 }
939 }
940
941 /// Truncate the [`CompactString`] to a shorter length.
942 ///
943 /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op.
944 ///
945 /// Calling this function does not change the capacity of the [`CompactString`].
946 ///
947 /// # Panics
948 ///
949 /// Panics if the new end of the string does not lie on a [`char`] boundary.
950 ///
951 /// # Examples
952 ///
953 /// Basic usage:
954 ///
955 /// ```
956 /// # use compact_str::CompactString;
957 /// let mut s = CompactString::new("Hello, world!");
958 /// s.truncate(5);
959 /// assert_eq!(s, "Hello");
960 /// ```
961 pub fn truncate(&mut self, new_len: usize) {
962 let s = self.as_str();
963 if new_len >= s.len() {
964 return;
965 }
966
967 assert!(
968 s.is_char_boundary(new_len),
969 "new_len must lie on char boundary",
970 );
971 unsafe { self.set_len(new_len) };
972 }
973
974 /// Converts a [`CompactString`] to a raw pointer.
975 #[inline]
976 pub fn as_ptr(&self) -> *const u8 {
977 self.0.as_slice().as_ptr()
978 }
979
980 /// Converts a mutable [`CompactString`] to a raw pointer.
981 #[inline]
982 pub fn as_mut_ptr(&mut self) -> *mut u8 {
983 unsafe { self.0.as_mut_buf().as_mut_ptr() }
984 }
985
986 /// Insert string character at an index.
987 ///
988 /// # Examples
989 ///
990 /// Basic usage:
991 ///
992 /// ```
993 /// # use compact_str::CompactString;
994 /// let mut s = CompactString::new("Hello!");
995 /// s.insert_str(5, ", world");
996 /// assert_eq!(s, "Hello, world!");
997 /// ```
998 pub fn insert_str(&mut self, idx: usize, string: &str) {
999 assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
1000
1001 let new_len = self.len() + string.len();
1002 self.reserve(string.len());
1003
1004 // SAFETY: We just checked that we may split self at idx.
1005 // We set the length only after reserving the memory.
1006 // We fill the gap with valid UTF-8 data.
1007 unsafe {
1008 // first move the tail to the new back
1009 let data = self.as_mut_ptr();
1010 core::ptr::copy(
1011 data.add(idx),
1012 data.add(idx + string.len()),
1013 new_len - idx - string.len(),
1014 );
1015
1016 // then insert the new bytes
1017 core::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
1018
1019 // and lastly resize the string
1020 self.set_len(new_len);
1021 }
1022 }
1023
1024 /// Insert a character at an index.
1025 ///
1026 /// # Examples
1027 ///
1028 /// Basic usage:
1029 ///
1030 /// ```
1031 /// # use compact_str::CompactString;
1032 /// let mut s = CompactString::new("Hello world!");
1033 /// s.insert(5, ',');
1034 /// assert_eq!(s, "Hello, world!");
1035 /// ```
1036 pub fn insert(&mut self, idx: usize, ch: char) {
1037 self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
1038 }
1039
1040 /// Reduces the length of the [`CompactString`] to zero.
1041 ///
1042 /// Calling this function does not change the capacity of the [`CompactString`].
1043 ///
1044 /// ```
1045 /// # use compact_str::CompactString;
1046 /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!");
1047 /// assert_eq!(s.capacity(), 49);
1048 ///
1049 /// s.clear();
1050 ///
1051 /// assert_eq!(s, "");
1052 /// assert_eq!(s.capacity(), 49);
1053 /// ```
1054 pub fn clear(&mut self) {
1055 unsafe { self.set_len(0) };
1056 }
1057
1058 /// Split the [`CompactString`] into at the given byte index.
1059 ///
1060 /// Calling this function does not change the capacity of the [`CompactString`], unless the
1061 /// [`CompactString`] is backed by a `&'static str`.
1062 ///
1063 /// # Panics
1064 ///
1065 /// Panics if `at` does not lie on a [`char`] boundary.
1066 ///
1067 /// Basic usage:
1068 ///
1069 /// ```
1070 /// # use compact_str::CompactString;
1071 /// let mut s = CompactString::const_new("Hello, world!");
1072 /// let w = s.split_off(5);
1073 ///
1074 /// assert_eq!(w, ", world!");
1075 /// assert_eq!(s, "Hello");
1076 /// ```
1077 pub fn split_off(&mut self, at: usize) -> Self {
1078 if let Some(s) = self.as_static_str() {
1079 let result = Self::const_new(&s[at..]);
1080 // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1081 unsafe { self.set_len(at) };
1082 result
1083 } else {
1084 let result = self[at..].into();
1085 // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1086 unsafe { self.set_len(at) };
1087 result
1088 }
1089 }
1090
1091 /// Remove a range from the [`CompactString`], and return it as an iterator.
1092 ///
1093 /// Calling this function does not change the capacity of the [`CompactString`].
1094 ///
1095 /// # Panics
1096 ///
1097 /// Panics if the start or end of the range does not lie on a [`char`] boundary.
1098 ///
1099 /// # Examples
1100 ///
1101 /// Basic usage:
1102 ///
1103 /// ```
1104 /// # use compact_str::CompactString;
1105 /// let mut s = CompactString::new("Hello, world!");
1106 ///
1107 /// let mut d = s.drain(5..12);
1108 /// assert_eq!(d.next(), Some(',')); // iterate over the extracted data
1109 /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str
1110 ///
1111 /// // The iterator keeps a reference to `s`, so you have to drop() the iterator,
1112 /// // before you can access `s` again.
1113 /// drop(d);
1114 /// assert_eq!(s, "Hello!");
1115 /// ```
1116 pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
1117 let (start, end) = self.ensure_range(range);
1118 Drain {
1119 compact_string: self as *mut Self,
1120 start,
1121 end,
1122 chars: self[start..end].chars(),
1123 }
1124 }
1125
1126 /// Shrinks the capacity of this [`CompactString`] with a lower bound.
1127 ///
1128 /// The resulting capactity is never less than the size of 3ร[`usize`],
1129 /// i.e. the capacity than can be inlined.
1130 ///
1131 /// # Examples
1132 ///
1133 /// Basic usage:
1134 ///
1135 /// ```
1136 /// # use compact_str::CompactString;
1137 /// let mut s = CompactString::with_capacity(100);
1138 /// assert_eq!(s.capacity(), 100);
1139 ///
1140 /// // if the capacity was already bigger than the argument, the call is a no-op
1141 /// s.shrink_to(100);
1142 /// assert_eq!(s.capacity(), 100);
1143 ///
1144 /// s.shrink_to(50);
1145 /// assert_eq!(s.capacity(), 50);
1146 ///
1147 /// // if the string can be inlined, it is
1148 /// s.shrink_to(10);
1149 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1150 /// ```
1151 #[inline]
1152 pub fn shrink_to(&mut self, min_capacity: usize) {
1153 self.0.shrink_to(min_capacity);
1154 }
1155
1156 /// Shrinks the capacity of this [`CompactString`] to match its length.
1157 ///
1158 /// The resulting capactity is never less than the size of 3ร[`usize`],
1159 /// i.e. the capacity than can be inlined.
1160 ///
1161 /// This method is effectively the same as calling [`string.shrink_to(0)`].
1162 ///
1163 /// # Examples
1164 ///
1165 /// Basic usage:
1166 ///
1167 /// ```
1168 /// # use compact_str::CompactString;
1169 /// let mut s = CompactString::from("This is a string with more than 24 characters.");
1170 ///
1171 /// s.reserve(100);
1172 /// assert!(s.capacity() >= 100);
1173 ///
1174 /// s.shrink_to_fit();
1175 /// assert_eq!(s.len(), s.capacity());
1176 /// ```
1177 ///
1178 /// ```
1179 /// # use compact_str::CompactString;
1180 /// let mut s = CompactString::from("short string");
1181 ///
1182 /// s.reserve(100);
1183 /// assert!(s.capacity() >= 100);
1184 ///
1185 /// s.shrink_to_fit();
1186 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1187 /// ```
1188 #[inline]
1189 pub fn shrink_to_fit(&mut self) {
1190 self.0.shrink_to(0);
1191 }
1192
1193 /// Retains only the characters specified by the predicate.
1194 ///
1195 /// The method iterates over the characters in the string and calls the `predicate`.
1196 ///
1197 /// If the `predicate` returns `false`, then the character gets removed.
1198 /// If the `predicate` returns `true`, then the character is kept.
1199 ///
1200 /// # Examples
1201 ///
1202 /// ```
1203 /// # use compact_str::CompactString;
1204 /// let mut s = CompactString::from("รคb๐dโฌ");
1205 ///
1206 /// let keep = [false, true, true, false, true];
1207 /// let mut iter = keep.iter();
1208 /// s.retain(|_| *iter.next().unwrap());
1209 ///
1210 /// assert_eq!(s, "b๐โฌ");
1211 /// ```
1212 pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
1213 // We iterate over the string, and copy character by character.
1214
1215 struct SetLenOnDrop<'a> {
1216 self_: &'a mut CompactString,
1217 src_idx: usize,
1218 dst_idx: usize,
1219 }
1220
1221 let mut g = SetLenOnDrop {
1222 self_: self,
1223 src_idx: 0,
1224 dst_idx: 0,
1225 };
1226 let s = g.self_.as_mut_str();
1227 while let Some(ch) = s[g.src_idx..].chars().next() {
1228 let ch_len = ch.len_utf8();
1229 if predicate(ch) {
1230 // SAFETY: We know that both indices are valid, and that we don't split a char.
1231 unsafe {
1232 let p = s.as_mut_ptr();
1233 core::ptr::copy(p.add(g.src_idx), p.add(g.dst_idx), ch_len);
1234 }
1235 g.dst_idx += ch_len;
1236 }
1237 g.src_idx += ch_len;
1238 }
1239
1240 impl Drop for SetLenOnDrop<'_> {
1241 fn drop(&mut self) {
1242 // SAFETY: We know that the index is a valid position to break the string.
1243 unsafe { self.self_.set_len(self.dst_idx) };
1244 }
1245 }
1246 drop(g);
1247 }
1248
1249 /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints
1250 ///
1251 /// # Examples
1252 ///
1253 /// ```
1254 /// # use compact_str::CompactString;
1255 /// let chess_knight = b"\xf0\x9f\xa8\x84";
1256 ///
1257 /// assert_eq!(
1258 /// "๐จ",
1259 /// CompactString::from_utf8_lossy(chess_knight),
1260 /// );
1261 ///
1262 /// // For valid UTF-8 slices, this is the same as:
1263 /// assert_eq!(
1264 /// "๐จ",
1265 /// CompactString::new(std::str::from_utf8(chess_knight).unwrap()),
1266 /// );
1267 /// ```
1268 ///
1269 /// Incorrect bytes:
1270 ///
1271 /// ```
1272 /// # use compact_str::CompactString;
1273 /// let broken = b"\xf0\x9f\xc8\x84";
1274 ///
1275 /// assert_eq!(
1276 /// "๏ฟฝศ",
1277 /// CompactString::from_utf8_lossy(broken),
1278 /// );
1279 ///
1280 /// // For invalid UTF-8 slices, this is an optimized implemented for:
1281 /// assert_eq!(
1282 /// "๏ฟฝศ",
1283 /// CompactString::from(String::from_utf8_lossy(broken)),
1284 /// );
1285 /// ```
1286 pub fn from_utf8_lossy(v: &[u8]) -> Self {
1287 fn next_char<'a>(
1288 iter: &mut <&[u8] as IntoIterator>::IntoIter,
1289 buf: &'a mut [u8; 4],
1290 ) -> Option<&'a [u8]> {
1291 const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
1292
1293 macro_rules! ensure_range {
1294 ($idx:literal, $range:pat) => {{
1295 let mut i = iter.clone();
1296 match i.next() {
1297 Some(&c) if matches!(c, $range) => {
1298 buf[$idx] = c;
1299 *iter = i;
1300 }
1301 _ => return Some(REPLACEMENT),
1302 }
1303 }};
1304 }
1305
1306 macro_rules! ensure_cont {
1307 ($idx:literal) => {{
1308 ensure_range!($idx, 0x80..=0xBF);
1309 }};
1310 }
1311
1312 let c = *iter.next()?;
1313 buf[0] = c;
1314
1315 match c {
1316 0x00..=0x7F => {
1317 // simple ASCII: push as is
1318 Some(&buf[..1])
1319 }
1320 0xC2..=0xDF => {
1321 // two bytes
1322 ensure_cont!(1);
1323 Some(&buf[..2])
1324 }
1325 0xE0..=0xEF => {
1326 // three bytes
1327 match c {
1328 // 0x80..=0x9F encodes surrogate half
1329 0xE0 => ensure_range!(1, 0xA0..=0xBF),
1330 // 0xA0..=0xBF encodes surrogate half
1331 0xED => ensure_range!(1, 0x80..=0x9F),
1332 // all UTF-8 continuation bytes are valid
1333 _ => ensure_cont!(1),
1334 }
1335 ensure_cont!(2);
1336 Some(&buf[..3])
1337 }
1338 0xF0..=0xF4 => {
1339 // four bytes
1340 match c {
1341 // 0x80..=0x8F encodes overlong three byte codepoint
1342 0xF0 => ensure_range!(1, 0x90..=0xBF),
1343 // 0x90..=0xBF encodes codepoint > U+10FFFF
1344 0xF4 => ensure_range!(1, 0x80..=0x8F),
1345 // all UTF-8 continuation bytes are valid
1346 _ => ensure_cont!(1),
1347 }
1348 ensure_cont!(2);
1349 ensure_cont!(3);
1350 Some(&buf[..4])
1351 }
1352 | 0x80..=0xBF // unicode continuation, invalid
1353 | 0xC0..=0xC1 // overlong one byte character
1354 | 0xF5..=0xF7 // four bytes that encode > U+10FFFF
1355 | 0xF8..=0xFB // five bytes, invalid
1356 | 0xFC..=0xFD // six bytes, invalid
1357 | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid
1358 }
1359 }
1360
1361 let mut buf = [0; 4];
1362 let mut result = Self::with_capacity(v.len());
1363 let mut iter = v.iter();
1364 while let Some(s) = next_char(&mut iter, &mut buf) {
1365 // SAFETY: next_char() only returns valid strings
1366 let s = unsafe { core::str::from_utf8_unchecked(s) };
1367 result.push_str(s);
1368 }
1369 result
1370 }
1371
1372 fn from_utf16x(
1373 v: &[u8],
1374 from_int: impl Fn(u16) -> u16,
1375 from_bytes: impl Fn([u8; 2]) -> u16,
1376 ) -> Result<Self, Utf16Error> {
1377 if v.len() % 2 != 0 {
1378 // Input had an odd number of bytes.
1379 return Err(Utf16Error(()));
1380 }
1381
1382 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
1383 // even though the size of our iterator, `v`, is known ahead of time.
1384 //
1385 // rustlang issue #48994 is tracking the fix
1386 let mut result = CompactString::with_capacity(v.len() / 2);
1387
1388 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1389 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1390 match unsafe { v.align_to::<u16>() } {
1391 (&[], v, &[]) => {
1392 // Input is correctly aligned.
1393 for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1394 result.push(c.map_err(|_| Utf16Error(()))?);
1395 }
1396 }
1397 _ => {
1398 // Input's alignment is off.
1399 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1400 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1401 for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1402 result.push(c.map_err(|_| Utf16Error(()))?);
1403 }
1404 }
1405 }
1406
1407 Ok(result)
1408 }
1409
1410 fn from_utf16x_lossy(
1411 v: &[u8],
1412 from_int: impl Fn(u16) -> u16,
1413 from_bytes: impl Fn([u8; 2]) -> u16,
1414 ) -> Self {
1415 // Notice: We write the string "๏ฟฝ" instead of the character '๏ฟฝ', so the character does not
1416 // have to be formatted before it can be appended.
1417
1418 let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
1419 true => (true, &v[..v.len() - 1]),
1420 false => (false, v),
1421 };
1422 let mut result = CompactString::with_capacity(v.len() / 2);
1423
1424 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1425 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1426 match unsafe { v.align_to::<u16>() } {
1427 (&[], v, &[]) => {
1428 // Input is correctly aligned.
1429 for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1430 match c {
1431 Ok(c) => result.push(c),
1432 Err(_) => result.push_str("๏ฟฝ"),
1433 }
1434 }
1435 }
1436 _ => {
1437 // Input's alignment is off.
1438 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1439 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1440 for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1441 match c {
1442 Ok(c) => result.push(c),
1443 Err(_) => result.push_str("๏ฟฝ"),
1444 }
1445 }
1446 }
1447 }
1448
1449 if trailing_extra_byte {
1450 result.push_str("๏ฟฝ");
1451 }
1452 result
1453 }
1454
1455 /// Decode a slice of bytes as UTF-16 encoded string, in little endian.
1456 ///
1457 /// # Errors
1458 ///
1459 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1460 /// a [`Utf16Error`] is returned.
1461 ///
1462 /// # Examples
1463 ///
1464 /// ```
1465 /// # use compact_str::CompactString;
1466 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe";
1467 /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap();
1468 /// assert_eq!(dancing_men, "๐ฏโโ๏ธ");
1469 /// ```
1470 #[inline]
1471 pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1472 CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
1473 }
1474
1475 /// Decode a slice of bytes as UTF-16 encoded string, in big endian.
1476 ///
1477 /// # Errors
1478 ///
1479 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1480 /// a [`Utf16Error`] is returned.
1481 ///
1482 /// # Examples
1483 ///
1484 /// ```
1485 /// # use compact_str::CompactString;
1486 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f";
1487 /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap();
1488 /// assert_eq!(dancing_women, "๐ฏโโ๏ธ");
1489 /// ```
1490 #[inline]
1491 pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1492 CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
1493 }
1494
1495 /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian.
1496 ///
1497 /// In this context "lossy" means that any broken characters in the input are replaced by the
1498 /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1499 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1500 ///
1501 /// # Examples
1502 ///
1503 /// ```
1504 /// # use compact_str::CompactString;
1505 /// // A "random" bit was flipped in the 4th byte:
1506 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe";
1507 /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN);
1508 /// assert_eq!(dancing_men, "๏ฟฝ\u{fc6f}\u{200d}โ๏ธ");
1509 /// ```
1510 #[inline]
1511 pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
1512 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
1513 }
1514
1515 /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian.
1516 ///
1517 /// In this context "lossy" means that any broken characters in the input are replaced by the
1518 /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1519 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1520 ///
1521 /// # Examples
1522 ///
1523 /// ```
1524 /// # use compact_str::CompactString;
1525 /// // A "random" bit was flipped in the 9th byte:
1526 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f";
1527 /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN);
1528 /// assert_eq!(dancing_women, "๐ฏ\u{200d}โ๏ฟฝ");
1529 /// ```
1530 #[inline]
1531 pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
1532 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
1533 }
1534
1535 /// Convert the [`CompactString`] into a [`String`].
1536 ///
1537 /// # Examples
1538 ///
1539 /// ```
1540 /// # use compact_str::CompactString;
1541 /// let s = CompactString::new("Hello world");
1542 /// let s = s.into_string();
1543 /// assert_eq!(s, "Hello world");
1544 /// ```
1545 pub fn into_string(self) -> String {
1546 self.0.into_string()
1547 }
1548
1549 /// Convert a [`String`] into a [`CompactString`] _without inlining_.
1550 ///
1551 /// Note: You probably don't need to use this method, instead you should use `From<String>`
1552 /// which is implemented for [`CompactString`].
1553 ///
1554 /// This method exists incase your code is very sensitive to memory allocations. Normally when
1555 /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack.
1556 /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on
1557 /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that
1558 /// was previously owned by the [`String`], so no trips to the allocator are needed.
1559 ///
1560 /// # Examples
1561 ///
1562 /// ### Short Strings
1563 /// ```
1564 /// use compact_str::CompactString;
1565 ///
1566 /// let short = "hello world".to_string();
1567 /// let c_heap = CompactString::from_string_buffer(short);
1568 ///
1569 /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer
1570 /// assert!(c_heap.is_heap_allocated());
1571 ///
1572 /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point
1573 /// let c_inline = c_heap.clone();
1574 /// assert!(!c_inline.is_heap_allocated());
1575 ///
1576 /// assert_eq!(c_heap, c_inline);
1577 /// ```
1578 ///
1579 /// ### Longer Strings
1580 /// ```
1581 /// use compact_str::CompactString;
1582 ///
1583 /// let x = "longer string that will be on the heap".to_string();
1584 /// let c1 = CompactString::from(x);
1585 ///
1586 /// let y = "longer string that will be on the heap".to_string();
1587 /// let c2 = CompactString::from_string_buffer(y);
1588 ///
1589 /// // for longer strings, we re-use the underlying String's buffer in both cases
1590 /// assert!(c1.is_heap_allocated());
1591 /// assert!(c2.is_heap_allocated());
1592 /// ```
1593 ///
1594 /// ### Buffer Re-use
1595 /// ```
1596 /// use compact_str::CompactString;
1597 ///
1598 /// let og = "hello world".to_string();
1599 /// let og_addr = og.as_ptr();
1600 ///
1601 /// let mut c = CompactString::from_string_buffer(og);
1602 /// let ex_addr = c.as_ptr();
1603 ///
1604 /// // When converting to/from String and CompactString with from_string_buffer we always re-use
1605 /// // the same underlying allocated memory/buffer
1606 /// assert_eq!(og_addr, ex_addr);
1607 ///
1608 /// let long = "this is a long string that will be on the heap".to_string();
1609 /// let long_addr = long.as_ptr();
1610 ///
1611 /// let mut long_c = CompactString::from(long);
1612 /// let long_ex_addr = long_c.as_ptr();
1613 ///
1614 /// // When converting to/from String and CompactString with From<String>, we'll also re-use the
1615 /// // underlying buffer, if the string is long, otherwise when converting to CompactString we
1616 /// // eagerly inline
1617 /// assert_eq!(long_addr, long_ex_addr);
1618 /// ```
1619 #[inline]
1620 #[track_caller]
1621 pub fn from_string_buffer(s: String) -> Self {
1622 let repr = Repr::from_string(s, false).unwrap_with_msg();
1623 CompactString(repr)
1624 }
1625
1626 /// Returns a copy of this string where each character is mapped to its
1627 /// ASCII lower case equivalent.
1628 ///
1629 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1630 /// but non-ASCII letters are unchanged.
1631 ///
1632 /// To lowercase the value in-place, use [`str::make_ascii_lowercase`].
1633 ///
1634 /// To lowercase ASCII characters in addition to non-ASCII characters, use
1635 /// [`CompactString::to_lowercase`].
1636 ///
1637 /// # Examples
1638 ///
1639 /// ```
1640 /// use compact_str::CompactString;
1641 /// let s = CompactString::new("Grรผรe, Jรผrgen โค");
1642 ///
1643 /// assert_eq!("grรผรe, jรผrgen โค", s.to_ascii_lowercase());
1644 /// ```
1645 #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1646 #[inline]
1647 pub fn to_ascii_lowercase(&self) -> Self {
1648 let mut s = self.clone();
1649 s.make_ascii_lowercase();
1650 s
1651 }
1652
1653 /// Returns a copy of this string where each character is mapped to its
1654 /// ASCII upper case equivalent.
1655 ///
1656 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1657 /// but non-ASCII letters are unchanged.
1658 ///
1659 /// To uppercase the value in-place, use [`str::make_ascii_uppercase`].
1660 ///
1661 /// To uppercase ASCII characters in addition to non-ASCII characters, use
1662 /// [`CompactString::to_uppercase`].
1663 ///
1664 /// # Examples
1665 ///
1666 /// ```
1667 /// use compact_str::CompactString;
1668 /// let s = CompactString::new("Grรผรe, Jรผrgen โค");
1669 ///
1670 /// assert_eq!("GRรผรE, JรผRGEN โค", s.to_ascii_uppercase());
1671 /// ```
1672 #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1673 #[inline]
1674 pub fn to_ascii_uppercase(&self) -> Self {
1675 let mut s = self.clone();
1676 s.make_ascii_uppercase();
1677 s
1678 }
1679
1680 /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1681 ///
1682 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1683 /// `Lowercase`.
1684 ///
1685 /// Since some characters can expand into multiple characters when changing
1686 /// the case, this function returns a [`CompactString`] instead of modifying the
1687 /// parameter in-place.
1688 ///
1689 /// # Examples
1690 ///
1691 /// Basic usage:
1692 ///
1693 /// ```
1694 /// use compact_str::CompactString;
1695 /// let s = CompactString::new("HELLO");
1696 ///
1697 /// assert_eq!("hello", s.to_lowercase());
1698 /// ```
1699 ///
1700 /// A tricky example, with sigma:
1701 ///
1702 /// ```
1703 /// use compact_str::CompactString;
1704 /// let sigma = CompactString::new("ฮฃ");
1705 ///
1706 /// assert_eq!("ฯ", sigma.to_lowercase());
1707 ///
1708 /// // but at the end of a word, it's ฯ, not ฯ:
1709 /// let odysseus = CompactString::new("แฝฮฮฅฮฃฮฃฮฮฮฃ");
1710 ///
1711 /// assert_eq!("แฝฮดฯ
ฯฯฮตฯฯ", odysseus.to_lowercase());
1712 /// ```
1713 ///
1714 /// Languages without case are not changed:
1715 ///
1716 /// ```
1717 /// use compact_str::CompactString;
1718 /// let new_year = CompactString::new("ๅๅๆฐๅนด");
1719 ///
1720 /// assert_eq!(new_year, new_year.to_lowercase());
1721 /// ```
1722 #[must_use = "this returns the lowercase string as a new CompactString, \
1723 without modifying the original"]
1724 pub fn to_lowercase(&self) -> Self {
1725 Self::from_str_to_lowercase(self.as_str())
1726 }
1727
1728 /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1729 ///
1730 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1731 /// `Lowercase`.
1732 ///
1733 /// Since some characters can expand into multiple characters when changing
1734 /// the case, this function returns a [`CompactString`] instead of modifying the
1735 /// parameter in-place.
1736 ///
1737 /// # Examples
1738 ///
1739 /// Basic usage:
1740 ///
1741 /// ```
1742 /// use compact_str::CompactString;
1743 ///
1744 /// assert_eq!("hello", CompactString::from_str_to_lowercase("HELLO"));
1745 /// ```
1746 ///
1747 /// A tricky example, with sigma:
1748 ///
1749 /// ```
1750 /// use compact_str::CompactString;
1751 ///
1752 /// assert_eq!("ฯ", CompactString::from_str_to_lowercase("ฮฃ"));
1753 ///
1754 /// // but at the end of a word, it's ฯ, not ฯ:
1755 /// assert_eq!("แฝฮดฯ
ฯฯฮตฯฯ", CompactString::from_str_to_lowercase("แฝฮฮฅฮฃฮฃฮฮฮฃ"));
1756 /// ```
1757 ///
1758 /// Languages without case are not changed:
1759 ///
1760 /// ```
1761 /// use compact_str::CompactString;
1762 ///
1763 /// let new_year = "ๅๅๆฐๅนด";
1764 /// assert_eq!(new_year, CompactString::from_str_to_lowercase(new_year));
1765 /// ```
1766 #[must_use = "this returns the lowercase string as a new CompactString, \
1767 without modifying the original"]
1768 pub fn from_str_to_lowercase(input: &str) -> Self {
1769 let mut s = convert_while_ascii(input.as_bytes(), u8::to_ascii_lowercase);
1770
1771 // Safety: we know this is a valid char boundary since
1772 // out.len() is only progressed if ascii bytes are found
1773 let rest = unsafe { input.get_unchecked(s.len()..) };
1774
1775 for (i, c) in rest.char_indices() {
1776 if c == 'ฮฃ' {
1777 // ฮฃ maps to ฯ, except at the end of a word where it maps to ฯ.
1778 // This is the only conditional (contextual) but language-independent mapping
1779 // in `SpecialCasing.txt`,
1780 // so hard-code it rather than have a generic "condition" mechanism.
1781 // See https://github.com/rust-lang/rust/issues/26035
1782 map_uppercase_sigma(rest, i, &mut s)
1783 } else {
1784 s.extend(c.to_lowercase());
1785 }
1786 }
1787 return s;
1788
1789 fn map_uppercase_sigma(from: &str, i: usize, to: &mut CompactString) {
1790 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1791 // for the definition of `Final_Sigma`.
1792 debug_assert!('ฮฃ'.len_utf8() == 2);
1793 let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1794 && !case_ignorable_then_cased(from[i + 2..].chars());
1795 to.push_str(if is_word_final { "ฯ" } else { "ฯ" });
1796 }
1797
1798 fn case_ignorable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
1799 use unicode_data::case_ignorable::lookup as Case_Ignorable;
1800 use unicode_data::cased::lookup as Cased;
1801 match iter.find(|&c| !Case_Ignorable(c)) {
1802 Some(c) => Cased(c),
1803 None => false,
1804 }
1805 }
1806 }
1807
1808 /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1809 ///
1810 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1811 /// `Uppercase`.
1812 ///
1813 /// Since some characters can expand into multiple characters when changing
1814 /// the case, this function returns a [`CompactString`] instead of modifying the
1815 /// parameter in-place.
1816 ///
1817 /// # Examples
1818 ///
1819 /// Basic usage:
1820 ///
1821 /// ```
1822 /// use compact_str::CompactString;
1823 /// let s = CompactString::new("hello");
1824 ///
1825 /// assert_eq!("HELLO", s.to_uppercase());
1826 /// ```
1827 ///
1828 /// Scripts without case are not changed:
1829 ///
1830 /// ```
1831 /// use compact_str::CompactString;
1832 /// let new_year = CompactString::new("ๅๅๆฐๅนด");
1833 ///
1834 /// assert_eq!(new_year, new_year.to_uppercase());
1835 /// ```
1836 ///
1837 /// One character can become multiple:
1838 /// ```
1839 /// use compact_str::CompactString;
1840 /// let s = CompactString::new("tschรผร");
1841 ///
1842 /// assert_eq!("TSCHรSS", s.to_uppercase());
1843 /// ```
1844 #[must_use = "this returns the uppercase string as a new CompactString, \
1845 without modifying the original"]
1846 pub fn to_uppercase(&self) -> Self {
1847 Self::from_str_to_uppercase(self.as_str())
1848 }
1849
1850 /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1851 ///
1852 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1853 /// `Uppercase`.
1854 ///
1855 /// Since some characters can expand into multiple characters when changing
1856 /// the case, this function returns a [`CompactString`] instead of modifying the
1857 /// parameter in-place.
1858 ///
1859 /// # Examples
1860 ///
1861 /// Basic usage:
1862 ///
1863 /// ```
1864 /// use compact_str::CompactString;
1865 ///
1866 /// assert_eq!("HELLO", CompactString::from_str_to_uppercase("hello"));
1867 /// ```
1868 ///
1869 /// Scripts without case are not changed:
1870 ///
1871 /// ```
1872 /// use compact_str::CompactString;
1873 ///
1874 /// let new_year = "ๅๅๆฐๅนด";
1875 /// assert_eq!(new_year, CompactString::from_str_to_uppercase(new_year));
1876 /// ```
1877 ///
1878 /// One character can become multiple:
1879 /// ```
1880 /// use compact_str::CompactString;
1881 ///
1882 /// assert_eq!("TSCHรSS", CompactString::from_str_to_uppercase("tschรผร"));
1883 /// ```
1884 #[must_use = "this returns the uppercase string as a new CompactString, \
1885 without modifying the original"]
1886 pub fn from_str_to_uppercase(input: &str) -> Self {
1887 let mut out = convert_while_ascii(input.as_bytes(), u8::to_ascii_uppercase);
1888
1889 // Safety: we know this is a valid char boundary since
1890 // out.len() is only progressed if ascii bytes are found
1891 let rest = unsafe { input.get_unchecked(out.len()..) };
1892
1893 for c in rest.chars() {
1894 out.extend(c.to_uppercase());
1895 }
1896
1897 out
1898 }
1899}
1900
1901/// Converts the bytes while the bytes are still ascii.
1902/// For better average performance, this is happens in chunks of `2*size_of::<usize>()`.
1903/// Returns a vec with the converted bytes.
1904///
1905/// Copied from https://doc.rust-lang.org/nightly/src/alloc/str.rs.html#623-666
1906#[inline]
1907fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> CompactString {
1908 let mut out = CompactString::with_capacity(b.len());
1909
1910 const USIZE_SIZE: usize = mem::size_of::<usize>();
1911 const MAGIC_UNROLL: usize = 2;
1912 const N: usize = USIZE_SIZE * MAGIC_UNROLL;
1913 const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
1914
1915 let mut i = 0;
1916 unsafe {
1917 while i + N <= b.len() {
1918 // Safety: we have checks the sizes `b` and `out` to know that our
1919 let in_chunk = b.get_unchecked(i..i + N);
1920 let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
1921
1922 let mut bits = 0;
1923 for j in 0..MAGIC_UNROLL {
1924 // read the bytes 1 usize at a time (unaligned since we haven't checked the
1925 // alignment) safety: in_chunk is valid bytes in the range
1926 bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
1927 }
1928 // if our chunks aren't ascii, then return only the prior bytes as init
1929 if bits & NONASCII_MASK != 0 {
1930 break;
1931 }
1932
1933 // perform the case conversions on N bytes (gets heavily autovec'd)
1934 for j in 0..N {
1935 // safety: in_chunk and out_chunk is valid bytes in the range
1936 let out = out_chunk.get_unchecked_mut(j);
1937 out.write(convert(in_chunk.get_unchecked(j)));
1938 }
1939
1940 // mark these bytes as initialised
1941 i += N;
1942 }
1943 out.set_len(i);
1944 }
1945
1946 out
1947}
1948
1949impl Clone for CompactString {
1950 #[inline]
1951 fn clone(&self) -> Self {
1952 Self(self.0.clone())
1953 }
1954
1955 #[inline]
1956 fn clone_from(&mut self, source: &Self) {
1957 self.0.clone_from(&source.0)
1958 }
1959}
1960
1961impl Default for CompactString {
1962 #[inline]
1963 fn default() -> Self {
1964 CompactString::new("")
1965 }
1966}
1967
1968impl Deref for CompactString {
1969 type Target = str;
1970
1971 #[inline]
1972 fn deref(&self) -> &str {
1973 self.as_str()
1974 }
1975}
1976
1977impl DerefMut for CompactString {
1978 #[inline]
1979 fn deref_mut(&mut self) -> &mut str {
1980 self.as_mut_str()
1981 }
1982}
1983
1984impl AsRef<str> for CompactString {
1985 #[inline]
1986 fn as_ref(&self) -> &str {
1987 self.as_str()
1988 }
1989}
1990
1991#[cfg(feature = "std")]
1992impl AsRef<OsStr> for CompactString {
1993 #[inline]
1994 fn as_ref(&self) -> &OsStr {
1995 OsStr::new(self.as_str())
1996 }
1997}
1998
1999impl AsRef<[u8]> for CompactString {
2000 #[inline]
2001 fn as_ref(&self) -> &[u8] {
2002 self.as_bytes()
2003 }
2004}
2005
2006impl Borrow<str> for CompactString {
2007 #[inline]
2008 fn borrow(&self) -> &str {
2009 self.as_str()
2010 }
2011}
2012
2013impl BorrowMut<str> for CompactString {
2014 #[inline]
2015 fn borrow_mut(&mut self) -> &mut str {
2016 self.as_mut_str()
2017 }
2018}
2019
2020impl Eq for CompactString {}
2021
2022impl<T: AsRef<str> + ?Sized> PartialEq<T> for CompactString {
2023 fn eq(&self, other: &T) -> bool {
2024 self.as_str() == other.as_ref()
2025 }
2026}
2027
2028impl PartialEq<CompactString> for &CompactString {
2029 fn eq(&self, other: &CompactString) -> bool {
2030 self.as_str() == other.as_str()
2031 }
2032}
2033
2034impl PartialEq<CompactString> for String {
2035 fn eq(&self, other: &CompactString) -> bool {
2036 self.as_str() == other.as_str()
2037 }
2038}
2039
2040impl PartialEq<&CompactString> for String {
2041 fn eq(&self, other: &&CompactString) -> bool {
2042 self.as_str() == other.as_str()
2043 }
2044}
2045
2046impl PartialEq<CompactString> for &String {
2047 fn eq(&self, other: &CompactString) -> bool {
2048 self.as_str() == other.as_str()
2049 }
2050}
2051
2052impl PartialEq<CompactString> for str {
2053 fn eq(&self, other: &CompactString) -> bool {
2054 self == other.as_str()
2055 }
2056}
2057
2058impl PartialEq<&'_ CompactString> for str {
2059 fn eq(&self, other: &&CompactString) -> bool {
2060 self == other.as_str()
2061 }
2062}
2063
2064impl PartialEq<CompactString> for &str {
2065 fn eq(&self, other: &CompactString) -> bool {
2066 *self == other.as_str()
2067 }
2068}
2069
2070impl PartialEq<CompactString> for &&str {
2071 fn eq(&self, other: &CompactString) -> bool {
2072 **self == other.as_str()
2073 }
2074}
2075
2076impl PartialEq<CompactString> for Cow<'_, str> {
2077 fn eq(&self, other: &CompactString) -> bool {
2078 *self == other.as_str()
2079 }
2080}
2081
2082impl PartialEq<CompactString> for &Cow<'_, str> {
2083 fn eq(&self, other: &CompactString) -> bool {
2084 *self == other.as_str()
2085 }
2086}
2087
2088impl PartialEq<String> for &CompactString {
2089 fn eq(&self, other: &String) -> bool {
2090 self.as_str() == other.as_str()
2091 }
2092}
2093
2094impl PartialEq<Cow<'_, str>> for &CompactString {
2095 fn eq(&self, other: &Cow<'_, str>) -> bool {
2096 self.as_str() == other
2097 }
2098}
2099
2100impl Ord for CompactString {
2101 fn cmp(&self, other: &Self) -> Ordering {
2102 self.as_str().cmp(other.as_str())
2103 }
2104}
2105
2106impl PartialOrd for CompactString {
2107 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2108 Some(self.cmp(other))
2109 }
2110}
2111
2112impl Hash for CompactString {
2113 fn hash<H: Hasher>(&self, state: &mut H) {
2114 self.as_str().hash(state)
2115 }
2116}
2117
2118impl<'a> From<&'a str> for CompactString {
2119 #[inline]
2120 #[track_caller]
2121 fn from(s: &'a str) -> Self {
2122 CompactString::new(s)
2123 }
2124}
2125
2126impl From<String> for CompactString {
2127 #[inline]
2128 #[track_caller]
2129 fn from(s: String) -> Self {
2130 let repr = Repr::from_string(s, true).unwrap_with_msg();
2131 CompactString(repr)
2132 }
2133}
2134
2135impl<'a> From<&'a String> for CompactString {
2136 #[inline]
2137 #[track_caller]
2138 fn from(s: &'a String) -> Self {
2139 CompactString::new(s)
2140 }
2141}
2142
2143impl<'a> From<Cow<'a, str>> for CompactString {
2144 fn from(cow: Cow<'a, str>) -> Self {
2145 match cow {
2146 Cow::Borrowed(s) => s.into(),
2147 // we separate these two so we can re-use the underlying buffer in the owned case
2148 Cow::Owned(s) => s.into(),
2149 }
2150 }
2151}
2152
2153impl From<Box<str>> for CompactString {
2154 #[inline]
2155 #[track_caller]
2156 fn from(b: Box<str>) -> Self {
2157 let s = b.into_string();
2158 let repr = Repr::from_string(s, true).unwrap_with_msg();
2159 CompactString(repr)
2160 }
2161}
2162
2163impl From<CompactString> for String {
2164 #[inline]
2165 fn from(s: CompactString) -> Self {
2166 s.into_string()
2167 }
2168}
2169
2170impl From<CompactString> for Cow<'_, str> {
2171 #[inline]
2172 fn from(s: CompactString) -> Self {
2173 if let Some(s) = s.as_static_str() {
2174 Self::Borrowed(s)
2175 } else {
2176 Self::Owned(s.into_string())
2177 }
2178 }
2179}
2180
2181impl<'a> From<&'a CompactString> for Cow<'a, str> {
2182 #[inline]
2183 fn from(s: &'a CompactString) -> Self {
2184 Self::Borrowed(s)
2185 }
2186}
2187
2188#[cfg(target_has_atomic = "ptr")]
2189impl From<CompactString> for alloc::sync::Arc<str> {
2190 fn from(value: CompactString) -> Self {
2191 Self::from(value.as_str())
2192 }
2193}
2194
2195impl From<CompactString> for alloc::rc::Rc<str> {
2196 fn from(value: CompactString) -> Self {
2197 Self::from(value.as_str())
2198 }
2199}
2200
2201#[cfg(feature = "std")]
2202impl From<CompactString> for Box<dyn std::error::Error + Send + Sync> {
2203 fn from(value: CompactString) -> Self {
2204 struct StringError(CompactString);
2205
2206 impl std::error::Error for StringError {
2207 #[allow(deprecated)]
2208 fn description(&self) -> &str {
2209 &self.0
2210 }
2211 }
2212
2213 impl fmt::Display for StringError {
2214 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2215 fmt::Display::fmt(&self.0, f)
2216 }
2217 }
2218
2219 // Purposefully skip printing "StringError(..)"
2220 impl fmt::Debug for StringError {
2221 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2222 fmt::Debug::fmt(&self.0, f)
2223 }
2224 }
2225
2226 Box::new(StringError(value))
2227 }
2228}
2229
2230#[cfg(feature = "std")]
2231impl From<CompactString> for Box<dyn std::error::Error> {
2232 fn from(value: CompactString) -> Self {
2233 let err1: Box<dyn std::error::Error + Send + Sync> = From::from(value);
2234 let err2: Box<dyn std::error::Error> = err1;
2235 err2
2236 }
2237}
2238
2239impl From<CompactString> for Box<str> {
2240 fn from(value: CompactString) -> Self {
2241 if value.is_heap_allocated() {
2242 value.into_string().into_boxed_str()
2243 } else {
2244 Box::from(value.as_str())
2245 }
2246 }
2247}
2248
2249#[cfg(feature = "std")]
2250impl From<CompactString> for std::ffi::OsString {
2251 fn from(value: CompactString) -> Self {
2252 Self::from(value.into_string())
2253 }
2254}
2255
2256#[cfg(feature = "std")]
2257impl From<CompactString> for std::path::PathBuf {
2258 fn from(value: CompactString) -> Self {
2259 Self::from(std::ffi::OsString::from(value))
2260 }
2261}
2262
2263#[cfg(feature = "std")]
2264impl AsRef<std::path::Path> for CompactString {
2265 fn as_ref(&self) -> &std::path::Path {
2266 std::path::Path::new(self.as_str())
2267 }
2268}
2269
2270impl From<CompactString> for alloc::vec::Vec<u8> {
2271 fn from(value: CompactString) -> Self {
2272 if value.is_heap_allocated() {
2273 value.into_string().into_bytes()
2274 } else {
2275 value.as_bytes().to_vec()
2276 }
2277 }
2278}
2279
2280impl FromStr for CompactString {
2281 type Err = core::convert::Infallible;
2282 fn from_str(s: &str) -> Result<CompactString, Self::Err> {
2283 Ok(CompactString::from(s))
2284 }
2285}
2286
2287impl fmt::Debug for CompactString {
2288 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2289 fmt::Debug::fmt(self.as_str(), f)
2290 }
2291}
2292
2293impl fmt::Display for CompactString {
2294 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2295 fmt::Display::fmt(self.as_str(), f)
2296 }
2297}
2298
2299impl FromIterator<char> for CompactString {
2300 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
2301 let repr = iter.into_iter().collect();
2302 CompactString(repr)
2303 }
2304}
2305
2306impl<'a> FromIterator<&'a char> for CompactString {
2307 fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
2308 let repr = iter.into_iter().collect();
2309 CompactString(repr)
2310 }
2311}
2312
2313impl<'a> FromIterator<&'a str> for CompactString {
2314 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
2315 let repr = iter.into_iter().collect();
2316 CompactString(repr)
2317 }
2318}
2319
2320impl FromIterator<Box<str>> for CompactString {
2321 fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
2322 let repr = iter.into_iter().collect();
2323 CompactString(repr)
2324 }
2325}
2326
2327impl<'a> FromIterator<Cow<'a, str>> for CompactString {
2328 fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
2329 let repr = iter.into_iter().collect();
2330 CompactString(repr)
2331 }
2332}
2333
2334impl FromIterator<String> for CompactString {
2335 fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
2336 let repr = iter.into_iter().collect();
2337 CompactString(repr)
2338 }
2339}
2340
2341impl FromIterator<CompactString> for CompactString {
2342 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2343 let repr = iter.into_iter().collect();
2344 CompactString(repr)
2345 }
2346}
2347
2348impl FromIterator<CompactString> for String {
2349 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2350 let mut iterator = iter.into_iter();
2351 match iterator.next() {
2352 None => String::new(),
2353 Some(buf) => {
2354 let mut buf = buf.into_string();
2355 buf.extend(iterator);
2356 buf
2357 }
2358 }
2359 }
2360}
2361
2362impl FromIterator<CompactString> for Cow<'_, str> {
2363 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2364 String::from_iter(iter).into()
2365 }
2366}
2367
2368impl Extend<char> for CompactString {
2369 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
2370 self.0.extend(iter)
2371 }
2372}
2373
2374impl<'a> Extend<&'a char> for CompactString {
2375 fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
2376 self.0.extend(iter)
2377 }
2378}
2379
2380impl<'a> Extend<&'a str> for CompactString {
2381 fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
2382 self.0.extend(iter)
2383 }
2384}
2385
2386impl Extend<Box<str>> for CompactString {
2387 fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
2388 self.0.extend(iter)
2389 }
2390}
2391
2392impl<'a> Extend<Cow<'a, str>> for CompactString {
2393 fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
2394 iter.into_iter().for_each(move |s| self.push_str(&s));
2395 }
2396}
2397
2398impl Extend<String> for CompactString {
2399 fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
2400 self.0.extend(iter)
2401 }
2402}
2403
2404impl Extend<CompactString> for String {
2405 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2406 for s in iter {
2407 self.push_str(&s);
2408 }
2409 }
2410}
2411
2412impl Extend<CompactString> for CompactString {
2413 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2414 for s in iter {
2415 self.push_str(&s);
2416 }
2417 }
2418}
2419
2420impl Extend<CompactString> for Cow<'_, str> {
2421 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2422 self.to_mut().extend(iter);
2423 }
2424}
2425
2426impl fmt::Write for CompactString {
2427 fn write_str(&mut self, s: &str) -> fmt::Result {
2428 self.push_str(s);
2429 Ok(())
2430 }
2431
2432 fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
2433 match args.as_str() {
2434 Some(s) => {
2435 if self.is_empty() && !self.is_heap_allocated() {
2436 // Since self is currently an empty inline variant or
2437 // an empty `StaticStr` variant, constructing a new one
2438 // with `Self::const_new` is more efficient since
2439 // it is guaranteed to be O(1).
2440 *self = Self::const_new(s);
2441 } else {
2442 self.push_str(s);
2443 }
2444 Ok(())
2445 }
2446 None => fmt::write(&mut self, args),
2447 }
2448 }
2449}
2450
2451impl Add<&str> for CompactString {
2452 type Output = Self;
2453 fn add(mut self, rhs: &str) -> Self::Output {
2454 self.push_str(rhs);
2455 self
2456 }
2457}
2458
2459impl AddAssign<&str> for CompactString {
2460 fn add_assign(&mut self, rhs: &str) {
2461 self.push_str(rhs);
2462 }
2463}
2464
2465/// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice.
2466///
2467/// This type is the error type for the [`from_utf16`] method on [`CompactString`].
2468///
2469/// [`from_utf16`]: CompactString::from_utf16
2470/// # Examples
2471///
2472/// Basic usage:
2473///
2474/// ```
2475/// # use compact_str::CompactString;
2476/// // ๐mu<invalid>ic
2477/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
2478/// 0xD800, 0x0069, 0x0063];
2479///
2480/// assert!(CompactString::from_utf16(v).is_err());
2481/// ```
2482#[derive(Copy, Clone, Debug)]
2483pub struct Utf16Error(());
2484
2485impl fmt::Display for Utf16Error {
2486 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2487 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
2488 }
2489}
2490
2491/// An iterator over the exacted data by [`CompactString::drain()`].
2492#[must_use = "iterators are lazy and do nothing unless consumed"]
2493pub struct Drain<'a> {
2494 compact_string: *mut CompactString,
2495 start: usize,
2496 end: usize,
2497 chars: core::str::Chars<'a>,
2498}
2499
2500// SAFETY: Drain keeps the lifetime of the CompactString it belongs to.
2501unsafe impl Send for Drain<'_> {}
2502unsafe impl Sync for Drain<'_> {}
2503
2504impl fmt::Debug for Drain<'_> {
2505 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2506 f.debug_tuple("Drain").field(&self.as_str()).finish()
2507 }
2508}
2509
2510impl fmt::Display for Drain<'_> {
2511 #[inline]
2512 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2513 f.write_str(self.as_str())
2514 }
2515}
2516
2517impl Drop for Drain<'_> {
2518 #[inline]
2519 fn drop(&mut self) {
2520 // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access
2521 // the CompactString, but this function right now. CompactString::drain() ensured
2522 // that the new extracted range does not split a UTF-8 character.
2523 unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
2524 }
2525}
2526
2527impl Drain<'_> {
2528 /// The remaining, unconsumed characters of the extracted substring.
2529 #[inline]
2530 pub fn as_str(&self) -> &str {
2531 self.chars.as_str()
2532 }
2533}
2534
2535impl Deref for Drain<'_> {
2536 type Target = str;
2537
2538 #[inline]
2539 fn deref(&self) -> &Self::Target {
2540 self.as_str()
2541 }
2542}
2543
2544impl Iterator for Drain<'_> {
2545 type Item = char;
2546
2547 #[inline]
2548 fn next(&mut self) -> Option<char> {
2549 self.chars.next()
2550 }
2551
2552 #[inline]
2553 fn count(self) -> usize {
2554 // <Chars as Iterator>::count() is specialized, and cloning is trivial.
2555 self.chars.clone().count()
2556 }
2557
2558 fn size_hint(&self) -> (usize, Option<usize>) {
2559 self.chars.size_hint()
2560 }
2561
2562 #[inline]
2563 fn last(mut self) -> Option<char> {
2564 self.chars.next_back()
2565 }
2566}
2567
2568impl DoubleEndedIterator for Drain<'_> {
2569 #[inline]
2570 fn next_back(&mut self) -> Option<char> {
2571 self.chars.next_back()
2572 }
2573}
2574
2575impl FusedIterator for Drain<'_> {}
2576
2577/// A possible error value if allocating or resizing a [`CompactString`] failed.
2578#[derive(Debug, Clone, Copy, PartialEq)]
2579pub struct ReserveError(());
2580
2581impl fmt::Display for ReserveError {
2582 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2583 f.write_str("Cannot allocate memory to hold CompactString")
2584 }
2585}
2586
2587#[cfg(feature = "std")]
2588#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2589impl std::error::Error for ReserveError {}
2590
2591/// A possible error value if [`ToCompactString::try_to_compact_string()`] failed.
2592#[derive(Debug, Clone, Copy, PartialEq)]
2593#[non_exhaustive]
2594pub enum ToCompactStringError {
2595 /// Cannot allocate memory to hold CompactString
2596 Reserve(ReserveError),
2597 /// [`Display::fmt()`][core::fmt::Display::fmt] returned an error
2598 Fmt(fmt::Error),
2599}
2600
2601impl fmt::Display for ToCompactStringError {
2602 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2603 match self {
2604 ToCompactStringError::Reserve(err) => err.fmt(f),
2605 ToCompactStringError::Fmt(err) => err.fmt(f),
2606 }
2607 }
2608}
2609
2610impl From<ReserveError> for ToCompactStringError {
2611 #[inline]
2612 fn from(value: ReserveError) -> Self {
2613 Self::Reserve(value)
2614 }
2615}
2616
2617impl From<fmt::Error> for ToCompactStringError {
2618 #[inline]
2619 fn from(value: fmt::Error) -> Self {
2620 Self::Fmt(value)
2621 }
2622}
2623
2624#[cfg(feature = "std")]
2625#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2626impl std::error::Error for ToCompactStringError {
2627 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2628 match self {
2629 ToCompactStringError::Reserve(err) => Some(err),
2630 ToCompactStringError::Fmt(err) => Some(err),
2631 }
2632 }
2633}
2634
2635trait UnwrapWithMsg {
2636 type T;
2637
2638 fn unwrap_with_msg(self) -> Self::T;
2639}
2640
2641impl<T, E: fmt::Display> UnwrapWithMsg for Result<T, E> {
2642 type T = T;
2643
2644 #[inline(always)]
2645 #[track_caller]
2646 fn unwrap_with_msg(self) -> T {
2647 match self {
2648 Ok(value) => value,
2649 Err(err) => unwrap_with_msg_fail(err),
2650 }
2651 }
2652}
2653
2654#[inline(never)]
2655#[cold]
2656#[track_caller]
2657fn unwrap_with_msg_fail<E: fmt::Display>(error: E) -> ! {
2658 panic!("{error}")
2659}
2660
2661static_assertions::assert_eq_size!(CompactString, String);
2662static_assertions::assert_eq_size!(Option<CompactString>, CompactString);