Skip to content

Commit

Permalink
Make sure Humanize(LetterCasing.Sentence) does not remove commas (#1185)
Browse files Browse the repository at this point in the history
- Adapt code so that mid-sentence punctuation is kept when humanizing to a sentence
- Split a regex into simpler constituent parts, easier to reason about
- Add tests
  • Loading branch information
louis-z authored Feb 15, 2024
1 parent c5bb097 commit 11bd16c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/Humanizer.Tests.Shared/StringHumanizeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ public void CanHumanizeIntoLowerCase(string input, string expectedResult)
[InlineData("CanReturnSentenceCase", "Can return sentence case")]
[InlineData("", "")]
[InlineData("égoïste", "Égoïste")]
[InlineData("Normal; Normal and PascalCase", "Normal; normal and pascal case")]
[InlineData("I,and No One else", "I, and no one else")]
public void CanHumanizeIntoSentenceCase(string input, string expectedResult)
{
Assert.Equal(expectedResult, input.Humanize(LetterCasing.Sentence));
Expand Down
9 changes: 8 additions & 1 deletion src/Humanizer/StringHumanizeExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ public static class StringHumanizeExtensions
private static readonly Regex PascalCaseWordPartsRegex;
private static readonly Regex FreestandingSpacingCharRegex;

private const string OptionallyCapitalizedWord = @"\p{Lu}?\p{Ll}+";
private const string IntegerAndOptionalLowercaseLetters = @"[0-9]+\p{Ll}*";
private const string Acronym = @"\p{Lu}+(?=\p{Lu}|[0-9]|\b)";
private const string SequenceOfOtherLetters = @"\p{Lo}+";
private const string MidSentencePunctuation = "[,;]?";

static StringHumanizeExtensions()
{
PascalCaseWordPartsRegex = new Regex(@"[\p{Lu}]?[\p{Ll}]+|[0-9]+[\p{Ll}]*|[\p{Lu}]+(?=[\p{Lu}][\p{Ll}]|[0-9]|\b)|[\p{Lo}]+",
PascalCaseWordPartsRegex = new Regex(
$"({OptionallyCapitalizedWord}|{IntegerAndOptionalLowercaseLetters}|{Acronym}|{SequenceOfOtherLetters}){MidSentencePunctuation}",
RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptionsUtil.Compiled);
FreestandingSpacingCharRegex = new Regex(@"\s[-_]|[-_]\s", RegexOptionsUtil.Compiled);
}
Expand Down

0 comments on commit 11bd16c

Please sign in to comment.