This module contains various string utility routines. See the module re for regular expression support. See the module pegs for PEG support.

TCharSet = set[char]
TFloatFormat = enum ffDefault, ## use the shorter floating point notation ffDecimal, ## use decimal floating point notation ffScientific ## use scientific notation (using ``e`` character): the different modes of floating point formating

Consts

Whitespace = {' ', '\x09', '\x0B', '\x0D', '\x0A', '\x0C'}

All the characters that count as whitespace.

Letters = {'A'..'Z', 'a'..'z'}

the set of letters

Digits = {'0'..'9'}

the set of digits

HexDigits = {'0'..'9', 'A'..'F', 'a'..'f'}

the set of hexadecimal digits

IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}

the set of characters an identifier can consist of

IdentStartChars = {'a'..'z', 'A'..'Z', '_'}

the set of characters an identifier can start with

NewLines = {'\x0D', '\x0A'}

the set of characters a newline terminator can start with

AllChars = {'\0'..'\xFF'}

A set with all the possible characters. Not very useful by its own, you can use it to create inverted sets to make the find() proc find invalid characters in strings. Example:

let invalid = AllChars - Digits
doAssert "01234".find(invalid) == -1
doAssert "01A34".find(invalid) == 2

Procs

proc toLower(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToLowerChar", raises: [], tags: [].}

Converts c into lower case. This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

proc toLower(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuToLowerStr", raises: [], tags: [].}

Converts s into lower case. This works only for the letters A-Z. See unicode.toLower for a version that works for any Unicode character.

proc toUpper(c: char): char {.noSideEffect, procvar, rtl, extern: "nsuToUpperChar", raises: [], tags: [].}

Converts c into upper case. This works only for the letters a-z. See unicode.toUpper for a version that works for any Unicode character.

proc toUpper(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuToUpperStr", raises: [], tags: [].}

Converts s into upper case. This works only for the letters a-z. See unicode.toUpper for a version that works for any Unicode character.

proc capitalize(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuCapitalize", raises: [], tags: [].}

Converts the first character of s into upper case. This works only for the letters a-z.

proc normalize(s: string): string {.noSideEffect, procvar, rtl, extern: "nsuNormalize", raises: [], tags: [].}

Normalizes the string s. That means to convert it to lower case and remove any '_'. This is needed for Nimrod identifiers for example.

proc cmpIgnoreCase(a, b: string): int {.noSideEffect, rtl, extern: "nsuCmpIgnoreCase", procvar, operator: 4, raises: [], tags: [].}

Compares two strings in a case insensitive manner. Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

proc cmpIgnoreStyle(a, b: string): int {.noSideEffect, rtl, extern: "nsuCmpIgnoreStyle", procvar, operator: 3, raises: [], tags: [].}

Compares two strings normalized (i.e. case and underscores do not matter). Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

proc strip(s: string; leading = true; trailing = true): string {.noSideEffect, rtl, extern: "nsuStrip", operator: 5, raises: [], tags: [].}

Strips whitespace from s and returns the resulting string. If leading is true, leading whitespace is stripped. If trailing is true, trailing whitespace is stripped.

proc toOctal(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal", raises: [], tags: [].}

Converts a character c to its octal representation. The resulting string may not have a leading zero. Its length is always exactly 3.

proc splitLines(s: string): seq[string] {.noSideEffect, rtl, extern: "nsuSplitLines", raises: [], tags: [].}

The same as the splitLines iterator, but is a proc that returns a sequence of substrings.

proc countLines(s: string): int {.noSideEffect, rtl, extern: "nsuCountLines", raises: [], tags: [].}

same as len(splitLines(s)), but much more efficient.

proc split(s: string; seps: set[char] = Whitespace): seq[string] {.noSideEffect, rtl, extern: "nsuSplitCharSet", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

proc split(s: string; sep: char): seq[string] {.noSideEffect, rtl, extern: "nsuSplitChar", raises: [], tags: [].}

The same as the split iterator, but is a proc that returns a sequence of substrings.

proc toHex(x: BiggestInt; len: int): string {.noSideEffect, rtl, extern: "nsuToHex", raises: [], tags: [].}

Converts x to its hexadecimal representation. The resulting string will be exactly len characters long. No prefix like 0x is generated. x is treated as an unsigned value.

proc intToStr(x: int; minchars: int = 1): string {.noSideEffect, rtl, extern: "nsuIntToStr", raises: [], tags: [].}

Converts x to its decimal representation. The resulting string will be minimally minchars characters long. This is achieved by adding leading zeros.

proc parseInt(s: string): int {.noSideEffect, procvar, rtl, extern: "nsuParseInt", raises: [EOverflow, EInvalidValue], tags: [].}

Parses a decimal integer value contained in s. If s is not a valid integer, EInvalidValue is raised.

proc parseBiggestInt(s: string): BiggestInt {.noSideEffect, procvar, rtl, extern: "nsuParseBiggestInt", raises: [EInvalidValue], tags: [].}

Parses a decimal integer value contained in s. If s is not a valid integer, EInvalidValue is raised.

proc parseFloat(s: string): float {.noSideEffect, procvar, rtl, extern: "nsuParseFloat", raises: [EInvalidValue], tags: [].}

Parses a decimal floating point value contained in s. If s is not a valid floating point number, EInvalidValue is raised. NAN, INF, -INF are also supported (case insensitive comparison).

proc parseHexInt(s: string): int {.noSideEffect, procvar, rtl, extern: "nsuParseHexInt", raises: [EInvalidValue], tags: [].}

Parses a hexadecimal integer value contained in s. If s is not a valid integer, EInvalidValue is raised. s can have one of the following optional prefixes: 0x, 0X, #. Underscores within s are ignored.

proc parseBool(s: string): bool {.raises: [EInvalidValue], tags: [].}

Parses a value into a bool. If s is one of the following values: y, yes, true, 1, on, then returns true. If s is one of the following values: n, no, false, 0, off, then returns false. If s is something else a EInvalidValue exception is raised.

proc parseEnum[T](s: string): T

parses an enum T. Raises EInvalidValue for an invalid value in s. The comparison is done in a style insensitive way.

proc parseEnum[T](s: string; default: T): T

parses an enum T. Uses default for an invalid value in s. The comparison is done in a style insensitive way.

proc repeatChar(count: int; c: char = ' '): string {.noSideEffect, rtl, extern: "nsuRepeatChar", raises: [], tags: [].}

Returns a string of length count consisting only of the character c. You can use this proc to left align strings. Example:

let
  width = 15
  text1 = "Hello user!"
  text2 = "This is a very long string"
echo text1 & repeatChar(max(0, width - text1.len)) & "|"
echo text2 & repeatChar(max(0, width - text2.len)) & "|"

proc repeatStr(count: int; s: string): string {.noSideEffect, rtl, extern: "nsuRepeatStr", raises: [], tags: [].}

Returns s concatenated count times.

proc align(s: string; count: int; padding = ' '): string {.noSideEffect, rtl, extern: "nsuAlignString", raises: [], tags: [].}

Aligns a string s with padding, so that is of length count. padding characters (by default spaces) are added before s resulting in right alignment. If s.len >= count, no spaces are added and s is returned unchanged. If you need to left align a string use the repeatChar proc. Example:

assert align("abc", 4) == " abc"
assert align("a", 0) == "a"
assert align("1232", 6) == "  1232"
assert align("1232", 6, '#') == "##1232"

proc wordWrap(s: string; maxLineWidth = 80; splitLongWords = true; seps: set[char] = Whitespace; newLine = "\x0A"): string {. noSideEffect, rtl, extern: "nsuWordWrap", raises: [], tags: [].}

word wraps s.

proc unindent(s: string; eatAllIndent = false): string {.noSideEffect, rtl, extern: "nsuUnindent", raises: [], tags: [].}

unindents s.

proc startsWith(s, prefix: string): bool {.noSideEffect, rtl, extern: "nsuStartsWith", raises: [], tags: [].}

Returns true iff s starts with prefix. If prefix == "" true is returned.

proc endsWith(s, suffix: string): bool {.noSideEffect, rtl, extern: "nsuEndsWith", raises: [], tags: [].}

Returns true iff s ends with suffix. If suffix == "" true is returned.

proc continuesWith(s, substr: string; start: int): bool {.noSideEffect, rtl, extern: "nsuContinuesWith", raises: [], tags: [].}

Returns true iff s continues with substr at position start. If substr == "" true is returned.

proc addSep(dest: var string; sep = ", "; startLen = 0) {.noSideEffect, inline, raises: [], tags: [].}

A shorthand for:

if dest.len > startLen: add(dest, sep)

This is often useful for generating some code where the items need to be separated by sep. sep is only added if dest is longer than startLen. The following example creates a string describing an array of integers:

var arr = "["
for x in items([2, 3, 5, 7, 11]):
  addSep(arr, startLen=len("["))
  add(arr, $x)
add(arr, "]")

proc allCharsInSet(s: string; theSet: TCharSet): bool {.raises: [], tags: [].}

returns true iff each character of s is in the set theSet.

proc abbrev(s: string; possibilities: openArray[string]): int {.raises: [], tags: [].}

returns the index of the first item in possibilities if not ambiguous; -1 if no item has been found; -2 if multiple items match.

proc join(a: openArray[string]; sep: string): string {.noSideEffect, rtl, extern: "nsuJoinSep", raises: [], tags: [].}

concatenates all strings in a separating them with sep.

proc join(a: openArray[string]): string {.noSideEffect, rtl, extern: "nsuJoin", raises: [], tags: [].}

concatenates all strings in a.

proc find(s, sub: string; start: int = 0): int {.noSideEffect, rtl, extern: "nsuFindStr", operator: 6, raises: [], tags: [].}

Searches for sub in s starting at position start. Searching is case-sensitive. If sub is not in s, -1 is returned.

proc find(s: string; sub: char; start: int = 0): int {.noSideEffect, rtl, extern: "nsuFindChar", raises: [], tags: [].}

Searches for sub in s starting at position start. Searching is case-sensitive. If sub is not in s, -1 is returned.

proc find(s: string; chars: set[char]; start: int = 0): int {.noSideEffect, rtl, extern: "nsuFindCharSet", raises: [], tags: [].}

Searches for chars in s starting at position start. If s contains none of the characters in chars, -1 is returned.

proc rfind(s, sub: string; start: int = - 1): int {.noSideEffect, raises: [], tags: [].}

Searches for sub in s in reverse, starting at start and going backwards to 0. Searching is case-sensitive. If sub is not in s, -1 is returned.

proc quoteIfContainsWhite(s: string): string {.deprecated, raises: [], tags: [].}

returns '"' & s & '"' if s contains a space and does not start with a quote, else returns s DEPRECATED as it was confused for shell quoting function. For this application use osproc.quoteShell.

proc contains(s: string; c: char): bool {.noSideEffect, raises: [], tags: [].}

Same as find(s, c) >= 0.

proc contains(s, sub: string): bool {.noSideEffect, raises: [], tags: [].}

Same as find(s, sub) >= 0.

proc contains(s: string; chars: set[char]): bool {.noSideEffect, raises: [], tags: [].}

Same as find(s, chars) >= 0.

proc replace(s, sub: string; by = ""): string {.noSideEffect, rtl, extern: "nsuReplaceStr", operator: 1, raises: [], tags: [].}

Replaces sub in s by the string by.

proc replace(s: string; sub, by: char): string {.noSideEffect, rtl, extern: "nsuReplaceChar", raises: [], tags: [].}

optimized version for characters.

proc replaceWord(s, sub: string; by = ""): string {.noSideEffect, rtl, extern: "nsuReplaceWord", raises: [], tags: [].}

Replaces sub in s by the string by. Each occurance of sub has to be surrounded by word boundaries (comparable to \\w in regular expressions), otherwise it is not replaced.

proc delete(s: var string; first, last: int) {.noSideEffect, rtl, extern: "nsuDelete", raises: [], tags: [].}

Deletes in s the characters at position first .. last. This modifies s itself, it does not return a copy.

proc parseOctInt(s: string): int {.noSideEffect, rtl, extern: "nsuParseOctInt", raises: [EInvalidValue], tags: [].}

Parses an octal integer value contained in s. If s is not a valid integer, EInvalidValue is raised. s can have one of the following optional prefixes: 0o, 0O. Underscores within s are ignored.

proc toOct(x: BiggestInt; len: int): string {.noSideEffect, rtl, extern: "nsuToOct", raises: [], tags: [].}

converts x into its octal representation. The resulting string is always len characters long. No leading 0o prefix is generated.

proc toBin(x: BiggestInt; len: int): string {.noSideEffect, rtl, extern: "nsuToBin", raises: [], tags: [].}

converts x into its binary representation. The resulting string is always len characters long. No leading 0b prefix is generated.

proc insertSep(s: string; sep = '_'; digits = 3): string {.noSideEffect, rtl, extern: "nsuInsertSep", raises: [], tags: [].}

inserts the separator sep after digits digits from right to left. Even though the algorithm works with any string s, it is only useful if s contains a number. Example: insertSep("1000000") == "1_000_000"

proc escape(s: string; prefix = "\""; suffix = "\""): string {.noSideEffect, rtl, extern: "nsuEscape", raises: [], tags: [].}

Escapes a string s. This does these operations (at the same time):

replaces any \ by \\
replaces any ' by \'
replaces any " by \"
replaces any other character in the set {'\0'..'\31', '\128'..'\255'} by \xHH where HH is its hexadecimal value.

The procedure has been designed so that its output is usable for many different common syntaxes. The resulting string is prefixed with prefix and suffixed with suffix. Both may be empty strings.

proc unescape(s: string; prefix = "\""; suffix = "\""): string {.noSideEffect, rtl, extern: "nsuUnescape", raises: [EInvalidValue, EInvalidValue], tags: [].}

Unescapes a string s. This complements escape as it performs the opposite operations.

If s does not begin with prefix and end with suffix a EInvalidValue exception will be raised.

proc validIdentifier(s: string): bool {.noSideEffect, rtl, extern: "nsuValidIdentifier", raises: [], tags: [].}

returns true if s is a valid identifier. A valid identifier starts with a character of the set IdentStartChars and is followed by any number of characters of the set IdentChars.

proc editDistance(a, b: string): int {.noSideEffect, rtl, extern: "nsuEditDistance", raises: [], tags: [].}

returns the edit distance between a and b. This uses the Levenshtein distance algorithm with only a linear memory overhead. This implementation is highly optimized!

proc formatBiggestFloat(f: BiggestFloat; format: TFloatFormat = ffDefault; precision: range[0 .. 32] = 16): string {.noSideEffect, operator: 2, rtl, extern: "nsu$1", raises: [], tags: [].}

converts a floating point value f to a string.

If format == ffDecimal then precision is the number of digits to be printed after the decimal point. If format == ffScientific then precision is the maximum number of significant digits to be printed. precision's default value is the maximum number of meaningful digits after the decimal point for Nimrod's biggestFloat type.

If precision == 0, it tries to format it nicely.

proc formatFloat(f: float; format: TFloatFormat = ffDefault; precision: range[0 .. 32] = 16): string {.noSideEffect, operator: 2, rtl, extern: "nsu$1", raises: [], tags: [].}

converts a floating point value f to a string.

proc formatSize(bytes: BiggestInt; decimalSep = '.'): string {.raises: [], tags: [].}

Rounds and formats bytes. Examples:

formatSize(1'i64 shl 31 + 300'i64) == "2.204GB"
formatSize(4096) == "4KB"

proc addf(s: var string; formatstr: string; a: varargs[string, `$`]) {. noSideEffect, rtl, extern: "nsuAddf", raises: [EInvalidValue], tags: [].}

The same as add(s, formatstr % a), but more efficient.

proc `%`(formatstr: string; a: openArray[string]): string {.noSideEffect, rtl, extern: "nsuFormatOpenArray", raises: [EInvalidValue], tags: [].}

The substitution operator performs string substitutions in formatstr and returns a modified formatstr. This is often called string interpolation.

This is best explained by an example:

"$1 eats $2." % ["The cat", "fish"]

Results in:

"The cat eats fish."

The substitution variables (the thing after the $) are enumerated from 1 to a.len. To produce a verbatim $, use $$. The notation $# can be used to refer to the next substitution variable:

"$# eats $#." % ["The cat", "fish"]

Substitution variables can also be words (that is [A-Za-z_]+[A-Za-z0-9_]*) in which case the arguments in a with even indices are keys and with odd indices are the corresponding values. An example:

"$animal eats $food." % ["animal", "The cat", "food", "fish"]

Results in:

"The cat eats fish."

The variables are compared with cmpIgnoreStyle. EInvalidValue is raised if an ill-formed format string has been passed to the % operator.

proc `%`(formatstr, a: string): string {.noSideEffect, rtl, extern: "nsuFormatSingleElem", raises: [EInvalidValue], tags: [].}

This is the same as formatstr % [a].

proc format(formatstr: string; a: varargs[string, `$`]): string {.noSideEffect, rtl, extern: "nsuFormatVarargs", raises: [EInvalidValue], tags: [].}

This is the same as formatstr % a except that it supports auto stringification.

Iterators

iterator split(s: string; seps: set[char] = Whitespace): string {.raises: [], tags: [].}

Splits the string s into substrings using a group of separators.

Substrings are separated by a substring containing only seps. Note that whole sequences of characters found in seps will be counted as a single split point and leading/trailing separators will be ignored. The following example:

for word in split("  this is an  example  "):
  writeln(stdout, word)

...generates this output:

"this"
"is"
"an"
"example"

And the following code:

for word in split(";;this;is;an;;example;;;", {';'}):
  writeln(stdout, word)

...produces the same output as the first example. The code:

let date = "2012-11-20T22:08:08.398990"
let separators = {' ', '-', ':', 'T'}
for number in split(date, separators):
  writeln(stdout, number)

...results in:

"2012"
"11"
"20"
"22"
"08"
"08.398990"

iterator split(s: string; sep: char): string {.raises: [], tags: [].}

Splits the string s into substrings using a single separator.

Substrings are separated by the character sep. Unlike the version of the iterator which accepts a set of separator characters, this proc will not coalesce groups of the separator, returning a string for each found character. The code:

for word in split(";;this;is;an;;example;;;", ';'):
  writeln(stdout, word)

Results in:

""
""
"this"
"is"
"an"
""
"example"
""
""
""

iterator splitLines(s: string): string {.raises: [], tags: [].}

Splits the string s into its containing lines. Every newline combination (CR, LF, CR-LF) is supported. The result strings contain no trailing \n.

Example:

for line in splitLines("\nthis\nis\nan\n\nexample\n"):
  writeln(stdout, line)

Results in:

""
"this"
"is"
"an"
""
"example"
""

iterator tokenize(s: string; seps: set[char] = Whitespace): tuple[token: string, isSep: bool] {.raises: [], tags: [].}

Tokenizes the string s into substrings.

Substrings are separated by a substring containing only seps. Examples:

for word in tokenize("  this is an  example  "):
  writeln(stdout, word)

Results in:

("  ", true)
("this", false)
(" ", true)
("is", false)
(" ", true)
("an", false)
("  ", true)
("example", false)
("  ", true)

Module strutils

Imports

Types

Consts

Procs

Iterators