-
Notifications
You must be signed in to change notification settings - Fork 4.8k
/
Copy pathUriHelper.cs
563 lines (503 loc) · 24.3 KB
/
UriHelper.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Text;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Buffers;
namespace System
{
internal static class UriHelper
{
// http://host/Path/Path/File?Query is the base of
// - http://host/Path/Path/File/ ... (those "File" words may be different in semantic but anyway)
// - http://host/Path/Path/#Fragment
// - http://host/Path/Path/?Query
// - http://host/Path/Path/MoreDir/ ...
// - http://host/Path/Path/OtherFile?Query
// - http://host/Path/Path/Fl
// - http://host/Path/Path/
//
// It is not a base for
// - http://host/Path/Path (that last "Path" is not considered as a directory)
// - http://host/Path/Path?Query
// - http://host/Path/Path#Fragment
// - http://host/Path/Path2/
// - http://host/Path/Path2/MoreDir
// - http://host/Path/File
//
// ASSUMES that strings like http://host/Path/Path/MoreDir/../../ have been canonicalized before going to this method.
// ASSUMES that back slashes already have been converted if applicable.
//
internal static unsafe bool TestForSubPath(char* selfPtr, int selfLength, char* otherPtr, int otherLength,
bool ignoreCase)
{
int i = 0;
char chSelf;
char chOther;
bool AllSameBeforeSlash = true;
for (; i < selfLength && i < otherLength; ++i)
{
chSelf = *(selfPtr + i);
chOther = *(otherPtr + i);
if (chSelf == '?' || chSelf == '#')
{
// survived so far and selfPtr does not have any more path segments
return true;
}
// If selfPtr terminates a path segment, so must otherPtr
if (chSelf == '/')
{
if (chOther != '/')
{
// comparison has failed
return false;
}
// plus the segments must be the same
if (!AllSameBeforeSlash)
{
// comparison has failed
return false;
}
//so far so good
AllSameBeforeSlash = true;
continue;
}
// if otherPtr terminates then selfPtr must not have any more path segments
if (chOther == '?' || chOther == '#')
{
break;
}
if (!ignoreCase)
{
if (chSelf != chOther)
{
AllSameBeforeSlash = false;
}
}
else
{
if (char.ToLowerInvariant(chSelf) != char.ToLowerInvariant(chOther))
{
AllSameBeforeSlash = false;
}
}
}
// If self is longer then it must not have any more path segments
for (; i < selfLength; ++i)
{
if ((chSelf = *(selfPtr + i)) == '?' || chSelf == '#')
{
return true;
}
if (chSelf == '/')
{
return false;
}
}
//survived by getting to the end of selfPtr
return true;
}
internal static string EscapeString(string stringToEscape, bool checkExistingEscaped, IndexOfAnyValues<char> noEscape)
{
ArgumentNullException.ThrowIfNull(stringToEscape);
Debug.Assert(!noEscape.Contains('%'), "Need to treat % specially; it should be part of any escaped set");
int indexOfFirstToEscape = stringToEscape.AsSpan().IndexOfAnyExcept(noEscape);
if (indexOfFirstToEscape < 0)
{
// Nothing to escape, just return the original string.
return stringToEscape;
}
// Otherwise, create a ValueStringBuilder to store the escaped data into,
// append to it all of the noEscape chars we already iterated through,
// escape the rest, and return the result as a string.
var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]);
vsb.Append(stringToEscape.AsSpan(0, indexOfFirstToEscape));
EscapeStringToBuilder(stringToEscape.AsSpan(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped);
return vsb.ToString();
}
internal static unsafe void EscapeString(ReadOnlySpan<char> stringToEscape, ref ValueStringBuilder dest,
bool checkExistingEscaped, IndexOfAnyValues<char> noEscape)
{
Debug.Assert(!noEscape.Contains('%'), "Need to treat % specially; it should be part of any escaped set");
int indexOfFirstToEscape = stringToEscape.IndexOfAnyExcept(noEscape);
if (indexOfFirstToEscape < 0)
{
// Nothing to escape, just copy the whole span.
dest.Append(stringToEscape);
}
else
{
dest.Append(stringToEscape.Slice(0, indexOfFirstToEscape));
EscapeStringToBuilder(stringToEscape.Slice(indexOfFirstToEscape), ref dest, noEscape, checkExistingEscaped);
}
}
private static void EscapeStringToBuilder(
ReadOnlySpan<char> stringToEscape, ref ValueStringBuilder vsb,
IndexOfAnyValues<char> noEscape, bool checkExistingEscaped)
{
Debug.Assert(!stringToEscape.IsEmpty && !noEscape.Contains(stringToEscape[0]));
// Allocate enough stack space to hold any Rune's UTF8 encoding.
Span<byte> utf8Bytes = stackalloc byte[4];
while (!stringToEscape.IsEmpty)
{
char c = stringToEscape[0];
if (!char.IsAscii(c))
{
if (Rune.DecodeFromUtf16(stringToEscape, out Rune r, out int charsConsumed) != OperationStatus.Done)
{
r = Rune.ReplacementChar;
}
Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r);
Debug.Assert(charsConsumed is 1 or 2);
stringToEscape = stringToEscape.Slice(charsConsumed);
// The rune is non-ASCII, so encode it as UTF8, and escape each UTF8 byte.
r.TryEncodeToUtf8(utf8Bytes, out int bytesWritten);
foreach (byte b in utf8Bytes.Slice(0, bytesWritten))
{
PercentEncodeByte(b, ref vsb);
}
continue;
}
if (!noEscape.Contains(c))
{
// If we're checking for existing escape sequences, then if this is the beginning of
// one, check the next two characters in the sequence.
if (c == '%' && checkExistingEscaped)
{
// If the next two characters are valid escaped ASCII, then just output them as-is.
if (stringToEscape.Length > 2 && char.IsAsciiHexDigit(stringToEscape[1]) && char.IsAsciiHexDigit(stringToEscape[2]))
{
vsb.Append('%');
vsb.Append(stringToEscape[1]);
vsb.Append(stringToEscape[2]);
stringToEscape = stringToEscape.Slice(3);
continue;
}
}
PercentEncodeByte((byte)c, ref vsb);
stringToEscape = stringToEscape.Slice(1);
continue;
}
// We have a character we don't want to escape. It's likely there are more, do a vectorized search.
int charsToCopy = stringToEscape.IndexOfAnyExcept(noEscape);
if (charsToCopy < 0)
{
charsToCopy = stringToEscape.Length;
}
Debug.Assert(charsToCopy > 0);
vsb.Append(stringToEscape.Slice(0, charsToCopy));
stringToEscape = stringToEscape.Slice(charsToCopy);
}
}
internal static unsafe char[] UnescapeString(string input, int start, int end, char[] dest,
ref int destPosition, char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax,
bool isQuery)
{
fixed (char* pStr = input)
{
return UnescapeString(pStr, start, end, dest, ref destPosition, rsvd1, rsvd2, rsvd3, unescapeMode,
syntax, isQuery);
}
}
internal static unsafe char[] UnescapeString(char* pStr, int start, int end, char[] dest, ref int destPosition,
char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery)
{
ValueStringBuilder vsb = new ValueStringBuilder(dest.Length);
vsb.Append(dest.AsSpan(0, destPosition));
UnescapeString(pStr, start, end, ref vsb, rsvd1, rsvd2, rsvd3, unescapeMode,
syntax, isQuery);
if (vsb.Length > dest.Length)
{
dest = vsb.AsSpan().ToArray();
}
else
{
vsb.AsSpan(destPosition).TryCopyTo(dest.AsSpan(destPosition));
}
destPosition = vsb.Length;
vsb.Dispose();
return dest;
}
//
// This method will assume that any good Escaped Sequence will be unescaped in the output
// - Assumes Dest.Length - detPosition >= end-start
// - UnescapeLevel controls various modes of operation
// - Any "bad" escape sequence will remain as is or '%' will be escaped.
// - destPosition tells the starting index in dest for placing the result.
// On return destPosition tells the last character + 1 position in the "dest" array.
// - The control chars and chars passed in rsdvX parameters may be re-escaped depending on UnescapeLevel
// - It is a RARE case when Unescape actually needs escaping some characters mentioned above.
// For this reason it returns a char[] that is usually the same ref as the input "dest" value.
//
internal static unsafe void UnescapeString(string input, int start, int end, ref ValueStringBuilder dest,
char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery)
{
fixed (char* pStr = input)
{
UnescapeString(pStr, start, end, ref dest, rsvd1, rsvd2, rsvd3, unescapeMode, syntax, isQuery);
}
}
internal static unsafe void UnescapeString(ReadOnlySpan<char> input, ref ValueStringBuilder dest,
char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery)
{
fixed (char* pStr = &MemoryMarshal.GetReference(input))
{
UnescapeString(pStr, 0, input.Length, ref dest, rsvd1, rsvd2, rsvd3, unescapeMode, syntax, isQuery);
}
}
internal static unsafe void UnescapeString(char* pStr, int start, int end, ref ValueStringBuilder dest,
char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery)
{
if ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.CopyOnly)
{
dest.Append(pStr + start, end - start);
return;
}
bool escapeReserved = false;
bool iriParsing = Uri.IriParsingStatic(syntax)
&& ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.EscapeUnescape);
for (int next = start; next < end; )
{
char ch = (char)0;
for (; next < end; ++next)
{
if ((ch = pStr[next]) == '%')
{
if ((unescapeMode & UnescapeMode.Unescape) == 0)
{
// re-escape, don't check anything else
escapeReserved = true;
}
else if (next + 2 < end)
{
ch = DecodeHexChars(pStr[next + 1], pStr[next + 2]);
// Unescape a good sequence if full unescape is requested
if (unescapeMode >= UnescapeMode.UnescapeAll)
{
if (ch == Uri.c_DummyChar)
{
if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
{
// Should be a rare case where the app tries to feed an invalid escaped sequence
throw new UriFormatException(SR.net_uri_BadString);
}
continue;
}
}
// re-escape % from an invalid sequence
else if (ch == Uri.c_DummyChar)
{
if ((unescapeMode & UnescapeMode.Escape) != 0)
escapeReserved = true;
else
continue; // we should throw instead but since v1.0 would just print '%'
}
// Do not unescape '%' itself unless full unescape is requested
else if (ch == '%')
{
next += 2;
continue;
}
// Do not unescape a reserved char unless full unescape is requested
else if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
{
next += 2;
continue;
}
// Do not unescape a dangerous char unless it's V1ToStringFlags mode
else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0 && IsNotSafeForUnescape(ch))
{
next += 2;
continue;
}
else if (iriParsing && ((ch <= '\x9F' && IsNotSafeForUnescape(ch)) ||
(ch > '\x9F' && !IriHelper.CheckIriUnicodeRange(ch, isQuery))))
{
// check if unenscaping gives a char outside iri range
// if it does then keep it escaped
next += 2;
continue;
}
// unescape escaped char or escape %
break;
}
else if (unescapeMode >= UnescapeMode.UnescapeAll)
{
if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
{
// Should be a rare case where the app tries to feed an invalid escaped sequence
throw new UriFormatException(SR.net_uri_BadString);
}
// keep a '%' as part of a bogus sequence
continue;
}
else
{
escapeReserved = true;
}
// escape (escapeReserved==true) or otherwise unescape the sequence
break;
}
else if ((unescapeMode & (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
== (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
{
continue;
}
else if ((unescapeMode & UnescapeMode.Escape) != 0)
{
// Could actually escape some of the characters
if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
{
// found an unescaped reserved character -> escape it
escapeReserved = true;
break;
}
else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0
&& (ch <= '\x1F' || (ch >= '\x7F' && ch <= '\x9F')))
{
// found an unescaped reserved character -> escape it
escapeReserved = true;
break;
}
}
}
//copy off previous characters from input
while (start < next)
dest.Append(pStr[start++]);
if (next != end)
{
if (escapeReserved)
{
PercentEncodeByte((byte)pStr[next], ref dest);
escapeReserved = false;
next++;
}
else if (ch <= 127)
{
dest.Append(ch);
next += 3;
}
else
{
// Unicode
int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
pStr + next,
end - next,
ref dest,
isQuery,
iriParsing);
Debug.Assert(charactersRead > 0);
next += charactersRead;
}
start = next;
}
}
}
internal static void PercentEncodeByte(byte b, ref ValueStringBuilder to)
{
to.Append('%');
HexConverter.ToCharsBuffer(b, to.AppendSpan(2), 0, HexConverter.Casing.Upper);
}
/// <summary>
/// Converts 2 hex chars to a byte (returned in a char), e.g, "0a" becomes (char)0x0A.
/// <para>If either char is not hex, returns <see cref="Uri.c_DummyChar"/>.</para>
/// </summary>
internal static char DecodeHexChars(int first, int second)
{
int a = HexConverter.FromChar(first);
int b = HexConverter.FromChar(second);
if ((a | b) == 0xFF)
{
// either a or b is 0xFF (invalid)
return Uri.c_DummyChar;
}
return (char)((a << 4) | b);
}
internal const string RFC3986ReservedMarks = @";/?:@&=+$,#[]!'()*";
private const string AdditionalUnsafeToUnescape = @"%\#"; // While not specified as reserved, these are still unsafe to unescape.
// When unescaping in safe mode, do not unescape the RFC 3986 reserved set:
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
//
// In addition, do not unescape the following unsafe characters:
// excluded = "%" / "\"
//
// This implementation used to use the following variant of the RFC 2396 reserved set.
// That behavior is now disabled by default, and is controlled by a UriSyntax property.
// reserved = ";" | "/" | "?" | "@" | "&" | "=" | "+" | "$" | ","
// excluded = control | "#" | "%" | "\"
internal static bool IsNotSafeForUnescape(char ch)
{
if (ch <= '\x1F' || (ch >= '\x7F' && ch <= '\x9F'))
{
return true;
}
const string NotSafeForUnescape = RFC3986ReservedMarks + AdditionalUnsafeToUnescape;
return NotSafeForUnescape.Contains(ch);
}
// true for all ASCII letters and digits, as well as the RFC3986 unreserved marks '-', '_', '.', and '~'
public static readonly IndexOfAnyValues<char> Unreserved =
IndexOfAnyValues.Create("-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~");
// true for all ASCII letters and digits, as well as the RFC3986 reserved characters, unreserved characters, and hash
public static readonly IndexOfAnyValues<char> UnreservedReserved =
IndexOfAnyValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~");
public static readonly IndexOfAnyValues<char> UnreservedReservedExceptHash =
IndexOfAnyValues.Create("!$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~");
public static readonly IndexOfAnyValues<char> UnreservedReservedExceptQuestionMarkHash =
IndexOfAnyValues.Create("!$&'()*+,-./0123456789:;=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~");
//
// Is this a gen delim char from RFC 3986
//
internal static bool IsGenDelim(char ch)
{
return (ch == ':' || ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@');
}
internal static readonly char[] s_WSchars = new char[] { ' ', '\n', '\r', '\t' };
internal static bool IsLWS(char ch)
{
return (ch <= ' ') && (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t');
}
// Is this a Bidirectional control char.. These get stripped
internal static bool IsBidiControlCharacter(char ch) =>
char.IsBetween(ch, '\u200E', '\u202E') && !char.IsBetween(ch, '\u2010', '\u2029');
// Strip Bidirectional control characters from this string
internal static unsafe string StripBidiControlCharacters(ReadOnlySpan<char> strToClean, string? backingString = null)
{
Debug.Assert(backingString is null || strToClean.Length == backingString.Length);
int charsToRemove = 0;
int indexOfPossibleCharToRemove = strToClean.IndexOfAnyInRange('\u200E', '\u202E');
if (indexOfPossibleCharToRemove >= 0)
{
// Slow path: Contains chars that fall in the [u200E, u202E] range (so likely Bidi)
foreach (char c in strToClean.Slice(indexOfPossibleCharToRemove))
{
if (IsBidiControlCharacter(c))
{
charsToRemove++;
}
}
}
if (charsToRemove == 0)
{
// Hot path
return backingString ?? new string(strToClean);
}
#pragma warning disable CS8500 // takes address of managed type
ReadOnlySpan<char> tmpStrToClean = strToClean; // avoid address exposing the span and impacting the other code in the method that uses it
return string.Create(tmpStrToClean.Length - charsToRemove, (IntPtr)(&tmpStrToClean), static (buffer, strToCleanPtr) =>
{
int destIndex = 0;
foreach (char c in *(ReadOnlySpan<char>*)strToCleanPtr)
{
if (!IsBidiControlCharacter(c))
{
buffer[destIndex++] = c;
}
}
Debug.Assert(buffer.Length == destIndex);
});
#pragma warning restore CS8500
}
}
}