mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-06-09 09:36:20 +09:00
lib.strings: init splitStringBy (#385643)
This commit is contained in:
commit
a9320986b3
3 changed files with 187 additions and 0 deletions
|
@ -347,6 +347,7 @@ let
|
|||
toSentenceCase
|
||||
addContextFrom
|
||||
splitString
|
||||
splitStringBy
|
||||
removePrefix
|
||||
removeSuffix
|
||||
versionOlder
|
||||
|
|
|
@ -1592,6 +1592,97 @@ rec {
|
|||
in
|
||||
map (addContextFrom s) splits;
|
||||
|
||||
/**
|
||||
Splits a string into substrings based on a predicate that examines adjacent characters.
|
||||
|
||||
This function provides a flexible way to split strings by checking pairs of characters
|
||||
against a custom predicate function. Unlike simpler splitting functions, this allows
|
||||
for context-aware splitting based on character transitions and patterns.
|
||||
|
||||
# Inputs
|
||||
|
||||
`predicate`
|
||||
: Function that takes two arguments (previous character and current character)
|
||||
and returns true when the string should be split at the current position.
|
||||
For the first character, previous will be "" (empty string).
|
||||
|
||||
`keepSplit`
|
||||
: Boolean that determines whether the splitting character should be kept as
|
||||
part of the result. If true, the character will be included at the beginning
|
||||
of the next substring; if false, it will be discarded.
|
||||
|
||||
`str`
|
||||
: The input string to split.
|
||||
|
||||
# Return
|
||||
|
||||
A list of substrings from the original string, split according to the predicate.
|
||||
|
||||
# Type
|
||||
|
||||
```
|
||||
splitStringBy :: (string -> string -> bool) -> bool -> string -> [string]
|
||||
```
|
||||
|
||||
# Examples
|
||||
:::{.example}
|
||||
## `lib.strings.splitStringBy` usage example
|
||||
|
||||
Split on periods and hyphens, discarding the separators:
|
||||
```nix
|
||||
splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz"
|
||||
=> [ "foo" "bar" "baz" ]
|
||||
```
|
||||
|
||||
Split on transitions from lowercase to uppercase, keeping the uppercase characters:
|
||||
```nix
|
||||
splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz"
|
||||
=> [ "foo" "Bar" "Baz" ]
|
||||
```
|
||||
|
||||
Handle leading separators correctly:
|
||||
```nix
|
||||
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"
|
||||
=> [ "" "foo" "bar" "baz" ]
|
||||
```
|
||||
|
||||
Handle trailing separators correctly:
|
||||
```nix
|
||||
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."
|
||||
=> [ "foo" "bar" "baz" "" ]
|
||||
```
|
||||
:::
|
||||
*/
|
||||
splitStringBy =
|
||||
predicate: keepSplit: str:
|
||||
let
|
||||
len = stringLength str;
|
||||
|
||||
# Helper function that processes the string character by character
|
||||
go =
|
||||
pos: currentPart: result:
|
||||
# Base case: reached end of string
|
||||
if pos == len then
|
||||
result ++ [ currentPart ]
|
||||
else
|
||||
let
|
||||
currChar = substring pos 1 str;
|
||||
prevChar = if pos > 0 then substring (pos - 1) 1 str else "";
|
||||
isSplit = predicate prevChar currChar;
|
||||
in
|
||||
if isSplit then
|
||||
# Split here - add current part to results and start a new one
|
||||
let
|
||||
newResult = result ++ [ currentPart ];
|
||||
newCurrentPart = if keepSplit then currChar else "";
|
||||
in
|
||||
go (pos + 1) newCurrentPart newResult
|
||||
else
|
||||
# Keep building current part
|
||||
go (pos + 1) (currentPart + currChar) result;
|
||||
in
|
||||
if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]);
|
||||
|
||||
/**
|
||||
Return a string without the specified prefix, if the prefix matches.
|
||||
|
||||
|
|
|
@ -631,6 +631,101 @@ runTests {
|
|||
];
|
||||
};
|
||||
|
||||
testSplitStringBySimpleDelimiter = {
|
||||
expr = strings.splitStringBy (
|
||||
prev: curr:
|
||||
builtins.elem curr [
|
||||
"."
|
||||
"-"
|
||||
]
|
||||
) false "foo.bar-baz";
|
||||
expected = [
|
||||
"foo"
|
||||
"bar"
|
||||
"baz"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByLeadingDelimiter = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz";
|
||||
expected = [
|
||||
""
|
||||
"foo"
|
||||
"bar"
|
||||
"baz"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByTrailingDelimiter = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz.";
|
||||
expected = [
|
||||
"foo"
|
||||
"bar"
|
||||
"baz"
|
||||
""
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByMultipleConsecutiveDelimiters = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar";
|
||||
expected = [
|
||||
"foo"
|
||||
""
|
||||
""
|
||||
"bar"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByKeepingSplitChar = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz";
|
||||
expected = [
|
||||
"foo"
|
||||
".bar"
|
||||
".baz"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByCaseTransition = {
|
||||
expr = strings.splitStringBy (
|
||||
prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null
|
||||
) true "fooBarBaz";
|
||||
expected = [
|
||||
"foo"
|
||||
"Bar"
|
||||
"Baz"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByEmptyString = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "";
|
||||
expected = [ "" ];
|
||||
};
|
||||
|
||||
testSplitStringByComplexPredicate = {
|
||||
expr = strings.splitStringBy (
|
||||
prev: curr:
|
||||
prev != ""
|
||||
&& curr != ""
|
||||
&& builtins.match "[0-9]" prev != null
|
||||
&& builtins.match "[a-z]" curr != null
|
||||
) true "123abc456def";
|
||||
expected = [
|
||||
"123"
|
||||
"abc456"
|
||||
"def"
|
||||
];
|
||||
};
|
||||
|
||||
testSplitStringByUpperCaseStart = {
|
||||
expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz";
|
||||
expected = [
|
||||
""
|
||||
"Foo"
|
||||
"Bar"
|
||||
"Baz"
|
||||
];
|
||||
};
|
||||
|
||||
testEscapeShellArg = {
|
||||
expr = strings.escapeShellArg "esc'ape\nme";
|
||||
expected = "'esc'\\''ape\nme'";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue