Node:StringUtils, Next:, Previous:Strings, Up:GPC Units



Higher level string handling

The following listing contains the interface of the StringUtils unit.

This unit provides some routines for string handling on a higher level than those provided by the RTS.

{ Some routines for string handling on a higher level than those
  provided by the RTS.

  Copyright (C) 1999-2003 Free Software Foundation, Inc.

  Author: Frank Heckenbach <frank@pascal.gnu.de>

  This file is part of GNU Pascal.

  GNU Pascal is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 2, or (at your
  option) any later version.

  GNU Pascal is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GNU Pascal; see the file COPYING. If not, write to the
  Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

  As a special exception, if you link this file with files compiled
  with a GNU compiler to produce an executable, this does not cause
  the resulting executable to be covered by the GNU General Public
  License. This exception does not however invalidate any other
  reasons why the executable file might be covered by the GNU
  General Public License. }

{$gnu-pascal,I-}
{$if __GPC_RELEASE__ < 20030303}
{$error This unit requires GPC release 20030303 or newer.}
{$endif}

unit StringUtils;

interface

uses GPC;

{ Various routines }

{ Appends Source to s, truncating the result if necessary. }
procedure AppendStr (var s: String; const Source: String);

{ Cuts s to MaxLength characters. If s is already MaxLength
  characters or shorter, it doesn't change anything. }
procedure StrCut (var s: String; MaxLength: Integer);

{ Returns the number of disjoint occurences of SubStr in s. Returns
  0 if SubStr is empty. }
function  StrCount (const SubStr: String; s: String): Integer;

{ Returns s, with all disjoint occurences of Source replaced by
  Dest. }
function  StrReplace (const s, Source, Dest: String): TString;

{ Sets of characters accepted for True and False by
  Char2Boolean and StrReadBoolean. }
var
  CharactersTrue : CharSet = ['Y', 'y'];
  CharactersFalse: CharSet = ['N', 'n'];

{ If ch is an element of CharactersTrue, Dest is set to True,
  otherwise if it is an element of CharactersFalse, Dest is set to
  False. In both cases True is returned. If ch is not an element of
  either set, Dest is set to False and False is returned. }
function  Char2Boolean (ch: Char; var Dest: Boolean): Boolean;

{ Converts a digit character to its numeric value. Handles every
  base up to 36 (0 .. 9, a .. z, upper and lower case recognized).
  Returns -1 if the character is not a digit at all. If you want to
  use it for a base < 36, you have to check if the result is smaller
  than the base and not equal to -1. }
function  Char2Digit (ch: Char): Integer;

{ Encode a string in a printable format (quoted printable and
  surrounded with "). All occurences of " within the string are
  encoded, so the result string contains exactly two " characters
  (at the beginning and ending). This is useful to store arbitrary
  strings in text files while keeping them as readable as possible
  (which is the goal of the quoted printable encoding in general,
  see RFC 1521, section 5.1) and being able to read them back
  losslessly (with UnQuoteString). }
function  QuoteString (const s: String): TString;

{ Encode a string in a printable format suitable for StrReadEnum.
  All occurences of , within the string are encoded. }
function  QuoteEnum (const s: String): TString;

{ Decode a string encoded by QuoteString (removing the " and
  expanding quoted printable encoded characters). Returns True if
  successful and False if the string has an invalid form. A string
  returned by QuoteString is always valid. }
function  UnQuoteString (var s: String): Boolean;

{ Decode a quoted-printable string (not enclosed in ", unlike for
  UnQuoteString). Returns True if successful and False if the string
  has an invalid form. In the latter case, it still decodes as much
  as is valid, even after the error position. }
function  UnQPString (var s: String): Boolean;

{ Quotes a string as done in shells, i.e. all special characters are
  enclosed in either " or ', where ", $ and ` are always
  enclosed in ' and ' is always enclosed in ". }
function  ShellQuoteString (const s: String): TString;

{ Replaces all tab characters in s with the appropriate amount of
  spaces, assuming tab stops at every TabSize columns. Returns True
  if successful and False if the expanded string would exceed the
  capacity of s. In the latter case, some, but not all of the tabs
  in s may have been expanded. }
function  ExpandTabs (var s: String; TabSize: Integer): Boolean;

{ Returns s, with all occurences of C style escape sequences (e.g.
  \n) replaced by the characters they mean. If AllowOctal is True,
  also octal character specifications (e.g. \007) are replaced. If
  RemoveQuoteChars is True, any other backslashes are removed (e.g.
  \* -> * and \\ -> \), otherwise they are kept, and also
  \\ is left as two backslashes then. }
function  ExpandCEscapeSequences (const s: String; RemoveQuoteChars,
  AllowOctal: Boolean): TString;

{ Routines for TPStrings }

{ Initialise a TPStrings variable, allocate Size characters for each
  element. This procedure does not dispose of any previously
  allocated storage, so if you use it on a previously used variable
  without freeing the storage yourself, this might cause memory
  leaks. }
procedure AllocateTPStrings (var Strings: TPStrings; Size: Integer);

{ Clears all elements (set them to empty strings), does not free any
  storage. }
procedure ClearTPStrings (var Strings: TPStrings);

{ Divide a string into substrings, using Separators as separator. A
  single trailing separator is ignored. Further trailing separators
  as well as any leading separators and multiple separators in a row
  produce empty substrings. }
function TokenizeString (const Source: String; Separators: CharSet):
  PPStrings;

{ Divide a string into substrings, using SpaceCharacters as
  separators. The splitting is done according the usual rules of
  shells, using (and removing) single and double quotes and
  QuotingCharacter. Multiple, leading and trailing separators are
  ignored. If there is an error, a message will be stored in ErrMsg,
  and nil will be returned. Nil will also be returned (without an
  error message) if s in empty. }
function ShellTokenizeString (const s: String; var ErrMsg: String):
  PPStrings;

{ String parsing routines }

{ All the following StrReadFoo functions behave similarly. They read
  items from a string s, starting at index i, to a variable Dest.
  They skip any space characters (spaces and tabs) by incrementing i
  first. They return True if successful, False otherwise. i is
  incremented accordingly if successful, otherwise i is left
  unchanged, apart from the skipping of space characters, and Dest
  is undefined. This behaviour makes it easy to use the functions in
  a row like this:

    i := 1;
    if StrReadInt    (s, i, Size)  and StrReadComma (s, i) and
       StrReadQuoted (s, i, Name)  and StrReadComma (s, i) and
       ...
       StrReadReal   (s, i, Angle) and (i > Length (s)) then ...

  (The check i > Length (s) is in case you don't want to accept
  trailing "garbage".) }

{ Just skip any space characters as described above. }
procedure StrSkipSpaces (const s: String; var i: Integer);

{ Read a quoted string (as produced by QuoteString) from a string
  and unquote the result using UnQuoteString. It is considered
  failure if the result (unquoted) would be longer than the capacity
  of Dest. }
function  StrReadQuoted (const s: String; var i: Integer; var Dest:
  String): Boolean;

{ Read a string delimited with Delimiter from a string and return
  the result with the delimiters removed. It is considered failure
  if the result (without delimiters) would be longer than the
  capacity of Dest. }
function  StrReadDelimited (const s: String; var i: Integer; var
  Dest: String; Delimiter: Char): Boolean;

{ Read a word (consisting of anything but space characters and
  commas) from a string. It is considered failure if the result
  would be longer than the capacity of Dest. }
function  StrReadWord (const s: String; var i: Integer; var Dest:
  String): Boolean;

{ Check that a certain string is contained in s (after possible
  space characters). }
function  StrReadConst (const s: String; var i: Integer; const
  Expected: String): Boolean;

{ A simpler to use version of StrReadConst that expects a ,. }
function  StrReadComma (const s: String; var i: Integer): Boolean;

{ Read an integer number from a string. }
function  StrReadInt (const s: String; var i: Integer; var Dest:
  Integer): Boolean;

{ Read a real number from a string. }
function  StrReadReal (const s: String; var i: Integer; var Dest:
  Real): Boolean;

{ Read a Boolean value, represented by a single character
  from CharactersTrue or CharactersFalse (cf. Char2Boolean), from a
  string. }
function  StrReadBoolean (const s: String; var i: Integer; var Dest:
  Boolean): Boolean;

{ Read an enumerated value, i.e., one of the entries of IDs, from a
  string, and stores the ordinal value, i.e., the index in IDs
  (always zero-based) in Dest. }
function  StrReadEnum (const s: String; var i: Integer; var Dest:
  Integer; const IDs: array of PString): Boolean;

{ String hash table }

const
  DefaultHashSize = 1403;

type
  THash = Cardinal;

  PStrHashList = ^TStrHashList;
  TStrHashList = record
    Next: PStrHashList;
    s: PString;
    i: Integer;
    p: Pointer
  end;

  PStrHashTable = ^TStrHashTable;
  TStrHashTable (Size: Cardinal) = record
    CaseSensitive: Boolean;
    Table: array [0 .. Size - 1] of PStrHashList
  end;

function  HashString          (const s: String): THash;
function  NewStrHashTable     (Size: Cardinal; CaseSensitive:
  Boolean): PStrHashTable;
procedure AddStrHashTable     (HashTable: PStrHashTable; s: String;
  i: Integer; p: Pointer);
procedure DeleteStrHashTable  (HashTable: PStrHashTable; s: String);
function  SearchStrHashTable  (HashTable: PStrHashTable; const s:
  String; var p: Pointer): Integer;  { p may be Null }
procedure StrHashTableUsage   (HashTable: PStrHashTable; var
  Entries, Slots: Integer);
procedure DisposeStrHashTable (HashTable: PStrHashTable);