1use core::fmt;
2use std::path::{Component, Path};
3use std::str::FromStr;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
10pub enum PlPath {
11 Local(Arc<Path>),
12 Cloud(PlCloudPath),
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
17pub enum PlPathRef<'a> {
18 Local(&'a Path),
19 Cloud(PlCloudPathRef<'a>),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
25pub struct PlCloudPath {
26 scheme: CloudScheme,
28 uri: Arc<str>,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct PlCloudPathRef<'a> {
34 scheme: CloudScheme,
36 uri: &'a str,
38}
39
40impl<'a> fmt::Display for PlCloudPathRef<'a> {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.write_str(self.uri())
43 }
44}
45
46impl fmt::Display for PlCloudPath {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 self.as_ref().fmt(f)
49 }
50}
51
52impl PlCloudPath {
53 pub fn as_ref(&self) -> PlCloudPathRef<'_> {
54 PlCloudPathRef {
55 scheme: self.scheme,
56 uri: self.uri.as_ref(),
57 }
58 }
59
60 pub fn strip_scheme(&self) -> &str {
61 &self.uri[self.scheme.as_str().len() + 3..]
62 }
63}
64
65impl PlCloudPathRef<'_> {
66 pub fn into_owned(self) -> PlCloudPath {
67 PlCloudPath {
68 scheme: self.scheme,
69 uri: self.uri.into(),
70 }
71 }
72
73 pub fn scheme(&self) -> CloudScheme {
74 self.scheme
75 }
76
77 pub fn uri(&self) -> &str {
78 self.uri
79 }
80
81 pub fn strip_scheme(&self) -> &str {
82 &self.uri[self.scheme.as_str().len() + "://".len()..]
83 }
84}
85
86pub struct AddressDisplay<'a> {
87 addr: PlPathRef<'a>,
88}
89
90impl<'a> fmt::Display for AddressDisplay<'a> {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self.addr {
93 PlPathRef::Local(p) => p.display().fmt(f),
94 PlPathRef::Cloud(p) => p.fmt(f),
95 }
96 }
97}
98
99macro_rules! impl_scheme {
100 ($($t:ident = $n:literal,)+) => {
101 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
102 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
103 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
104 pub enum CloudScheme {
105 $($t,)+
106 }
107
108 impl FromStr for CloudScheme {
109 type Err = ();
110
111 fn from_str(s: &str) -> Result<Self, Self::Err> {
112 match s {
113 $($n => Ok(Self::$t),)+
114 _ => Err(()),
115 }
116 }
117 }
118
119 impl CloudScheme {
120 pub fn as_str(&self) -> &'static str {
121 match self {
122 $(Self::$t => $n,)+
123 }
124 }
125 }
126 };
127}
128
129impl_scheme! {
130 S3 = "s3",
131 S3a = "s3a",
132 Gs = "gs",
133 Gcs = "gcs",
134 File = "file",
135 Abfs = "abfs",
136 Abfss = "abfss",
137 Azure = "azure",
138 Az = "az",
139 Adl = "adl",
140 Http = "http",
141 Https = "https",
142 Hf = "hf",
143}
144
145impl fmt::Display for CloudScheme {
146 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
147 f.write_str(self.as_str())
148 }
149}
150
151crate::regex_cache::cached_regex! {
152 static CLOUD_SCHEME_REGEX = r"^(s3a?|gs|gcs|file|abfss?|azure|az|adl|https?|hf)$";
153}
154
155impl<'a> PlPathRef<'a> {
156 pub fn scheme(&self) -> Option<CloudScheme> {
157 match self {
158 Self::Local(_) => None,
159 Self::Cloud(p) => Some(p.scheme),
160 }
161 }
162
163 pub fn is_local(&self) -> bool {
164 matches!(self, Self::Local(_))
165 }
166
167 pub fn is_cloud_url(&self) -> bool {
168 matches!(self, Self::Cloud(_))
169 }
170
171 pub fn as_local_path(&self) -> Option<&Path> {
172 match self {
173 Self::Local(p) => Some(p),
174 Self::Cloud(_) => None,
175 }
176 }
177
178 pub fn as_cloud_addr(&self) -> Option<PlCloudPathRef<'_>> {
179 match self {
180 Self::Local(_) => None,
181 Self::Cloud(p) => Some(*p),
182 }
183 }
184
185 pub fn join(&self, other: impl AsRef<str>) -> PlPath {
186 let other = other.as_ref();
187 if other.is_empty() {
188 return self.into_owned();
189 }
190
191 match self {
192 Self::Local(p) => PlPath::Local(p.join(other).into()),
193 Self::Cloud(p) => {
194 let needs_slash = !p.uri.ends_with('/') && !other.starts_with('/');
195
196 let mut out =
197 String::with_capacity(p.uri.len() + usize::from(needs_slash) + other.len());
198
199 out.push_str(p.uri);
200 if needs_slash {
201 out.push('/');
202 }
203 out.push_str(other);
208
209 let uri = out.into();
210 PlPath::Cloud(PlCloudPath {
211 scheme: p.scheme,
212 uri,
213 })
214 },
215 }
216 }
217
218 pub fn display(&self) -> AddressDisplay<'_> {
219 AddressDisplay { addr: *self }
220 }
221
222 pub fn from_local_path(path: &'a Path) -> Self {
223 Self::Local(path)
224 }
225
226 pub fn new(uri: &'a str) -> Self {
227 if let Some(i) = uri.find([':', '/']) {
228 if uri[i..].starts_with("://") && CLOUD_SCHEME_REGEX.is_match(&uri[..i]) {
229 let scheme = CloudScheme::from_str(&uri[..i]).unwrap();
230 return Self::Cloud(PlCloudPathRef { scheme, uri });
231 }
232 }
233
234 Self::from_local_path(Path::new(uri))
235 }
236
237 pub fn into_owned(self) -> PlPath {
238 match self {
239 Self::Local(p) => PlPath::Local(p.into()),
240 Self::Cloud(p) => PlPath::Cloud(p.into_owned()),
241 }
242 }
243
244 pub fn strip_scheme(&self) -> &str {
245 match self {
246 Self::Local(p) => p.to_str().unwrap(),
247 Self::Cloud(p) => p.strip_scheme(),
248 }
249 }
250
251 pub fn parent(&self) -> Option<Self> {
252 Some(match self {
253 Self::Local(p) => Self::Local(p.parent()?),
254 Self::Cloud(p) => {
255 let uri = p.uri;
256 let offset_start = p.scheme.as_str().len() + 3;
257 let last_slash = uri[offset_start..]
258 .char_indices()
259 .rev()
260 .find(|(_, c)| *c == '/')?
261 .0;
262 let uri = &uri[..offset_start + last_slash];
263
264 Self::Cloud(PlCloudPathRef {
265 scheme: p.scheme,
266 uri,
267 })
268 },
269 })
270 }
271
272 pub fn extension(&self) -> Option<&str> {
273 match self {
274 Self::Local(path) => path.extension().and_then(|e| e.to_str()),
275 Self::Cloud(_) => {
276 let offset_path = self.strip_scheme();
277 let separator = '/';
278
279 let mut ext_start = None;
280 for (i, c) in offset_path.char_indices() {
281 if c == separator {
282 ext_start = None;
283 }
284
285 if c == '.' && ext_start.is_none() {
286 ext_start = Some(i);
287 }
288 }
289
290 ext_start.map(|i| &offset_path[i + 1..])
291 },
292 }
293 }
294
295 pub fn to_str(&self) -> &'a str {
296 match self {
297 Self::Local(p) => p.to_str().unwrap(),
298 Self::Cloud(p) => p.uri,
299 }
300 }
301
302 pub fn offset_bytes(&'a self, n: usize) -> Self {
304 match self {
305 Self::Local(path) => {
306 let s = path.to_str().expect("Path is not valid UTF-8");
307 PlPathRef::Local(Path::new(&s[n..]))
308 },
309 Self::Cloud(cloudpath) => {
310 let s = self.to_str();
311 PlPathRef::Cloud(PlCloudPathRef {
312 scheme: cloudpath.scheme,
313 uri: &s[n..],
314 })
315 },
316 }
317 }
318
319 pub fn get_normal_components(&self) -> Box<dyn Iterator<Item = &str> + '_> {
328 match self {
329 Self::Local(path) => Box::new(path.components().filter_map(|c| match c {
330 Component::Normal(seg) => Some(seg.to_str().unwrap()),
331 _ => None,
332 })),
333 Self::Cloud(cloudpath) => {
334 let separator = '/';
335 let query_delimiter = '?';
336 let path = cloudpath.strip_scheme();
337 let path = path
338 .split_once(query_delimiter)
339 .map_or(path, |(before, _)| before);
340 Box::new(path.split(separator))
341 },
342 }
343 }
344}
345
346impl PlPath {
347 pub fn new(uri: &str) -> Self {
348 PlPathRef::new(uri).into_owned()
349 }
350
351 pub fn display(&self) -> AddressDisplay<'_> {
352 AddressDisplay {
353 addr: match self {
354 Self::Local(p) => PlPathRef::Local(p.as_ref()),
355 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
356 },
357 }
358 }
359
360 pub fn is_local(&self) -> bool {
361 self.as_ref().is_local()
362 }
363
364 pub fn is_cloud_url(&self) -> bool {
365 self.as_ref().is_cloud_url()
366 }
367
368 #[expect(clippy::should_implement_trait)]
370 pub fn from_str(uri: &str) -> Self {
371 Self::new(uri)
372 }
373
374 pub fn from_string(uri: String) -> Self {
375 Self::new(&uri)
376 }
377
378 pub fn as_ref(&self) -> PlPathRef<'_> {
379 match self {
380 Self::Local(p) => PlPathRef::Local(p.as_ref()),
381 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
382 }
383 }
384
385 pub fn cloud_scheme(&self) -> Option<CloudScheme> {
386 match self {
387 Self::Local(_) => None,
388 Self::Cloud(p) => Some(p.scheme),
389 }
390 }
391
392 pub fn to_str(&self) -> &str {
393 match self {
394 Self::Local(p) => p.to_str().unwrap(),
395 Self::Cloud(p) => p.uri.as_ref(),
396 }
397 }
398
399 pub fn into_local_path(self) -> Option<Arc<Path>> {
400 match self {
401 PlPath::Local(path) => Some(path),
402 PlPath::Cloud(_) => None,
403 }
404 }
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410
411 #[test]
412 fn plpath_join() {
413 macro_rules! assert_plpath_join {
414 ($base:literal + $added:literal => $result:literal$(, $uri_result:literal)?) => {
415 let path_base = $base.chars().map(|c| match c {
417 '/' => std::path::MAIN_SEPARATOR,
418 c => c,
419 }).collect::<String>();
420 let path_added = $added.chars().map(|c| match c {
421 '/' => std::path::MAIN_SEPARATOR,
422 c => c,
423 }).collect::<String>();
424 let path_result = $result.chars().map(|c| match c {
425 '/' => std::path::MAIN_SEPARATOR,
426 c => c,
427 }).collect::<String>();
428 assert_eq!(PlPath::new(&path_base).as_ref().join(path_added).to_str(), path_result);
429
430 let uri_base = format!("file://{}", $base);
432 #[allow(unused_variables)]
433 let result = {
434 let x = $result;
435 $(let x = $uri_result;)?
436 x
437 };
438 let uri_result = format!("file://{result}");
439 assert_eq!(
440 PlPath::new(uri_base.as_str())
441 .as_ref()
442 .join($added)
443 .to_str(),
444 uri_result.as_str()
445 );
446 };
447 }
448
449 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
450 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
451 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
452 assert_plpath_join!("a/b/c" + "" => "a/b/c");
453 assert_plpath_join!("a/b/c" + "/d" => "/d", "a/b/c/d");
454 assert_plpath_join!("a/b/c" + "/d/" => "/d/", "a/b/c/d/");
455 assert_plpath_join!("" + "/d/" => "/d/");
456 assert_plpath_join!("/" + "/d/" => "/d/", "//d/");
457 assert_plpath_join!("/x/y" + "/d/" => "/d/", "/x/y/d/");
458 assert_plpath_join!("/x/y" + "/d" => "/d", "/x/y/d");
459 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
460
461 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
462 assert_plpath_join!("/a/longer" + "/path" => "/path", "/a/longer/path");
463 assert_plpath_join!("/a/longer" + "path/wow" => "/a/longer/path/wow");
464 assert_plpath_join!("/a/longer" + "/path/wow" => "/path/wow", "/a/longer/path/wow");
465 assert_plpath_join!("/an/even/longer" + "path" => "/an/even/longer/path");
466 assert_plpath_join!("/an/even/longer" + "/path" => "/path", "/an/even/longer/path");
467 assert_plpath_join!("/an/even/longer" + "path/wow" => "/an/even/longer/path/wow");
468 assert_plpath_join!("/an/even/longer" + "/path/wow" => "/path/wow", "/an/even/longer/path/wow");
469 }
470}