7.3 Library String Functions

So much for a few homemade attempts to implement some string operations. The reader will be asked to implement a few more of these in the exercises. This section will elaborate on what others consider to be a reasonable collection of string functions.

What follows is the definition of a module called Strings that is available in the ISO compliant versions of Modula-2. There are also notes on some variants for other versions. The explanation of the various procedures follows.

NOTES: 1. ISO standard versions of Modula - 2, implement within the language proper a standard procedure LENGTH with the syntax exactly as indicated above for length. The function Length is still included in the Strings module for the sake of compatibility with older versions.

2. Because some other type of string may also be required for operating system use, or for interface to other languages, many versions of Modula-2 have two separate string handling modules, and a third for conversion functions to move data between the two types. See the exercises for more information.

3. In non-ISO versions, the procedures made available are not always the same as these, and when they are, the order of the parameters varies from one implementation to another.

DEFINITION MODULE Strings;

TYPE
  String1 = ARRAY [0..0] OF CHAR;
    (* for constructing a value of a single-character string type to pass CHAR values to ARRAY OF CHAR parameters. *)

PROCEDURE Length (stringVal: ARRAY OF CHAR): CARDINAL;
  (* Returns the same value as would be returned by LENGTH *)

PROCEDURE Assign (source: ARRAY OF CHAR; VAR destination: ARRAY OF CHAR);
  (* Copies source to destination *)

PROCEDURE Extract (source: ARRAY OF CHAR; startPos,
                    numberToExtract: CARDINAL;
                    VAR destination: ARRAY OF CHAR);
  (* Copies at most numberToExtract characters from source to destination, starting at position startPos in source. *)

PROCEDURE Delete (VAR stringVar: ARRAY OF CHAR; startPos,
                    numberToDelete: CARDINAL);
  (* Deletes at most numberToDelete characters from stringVar, starting at position startPos. *)

PROCEDURE Insert (source: ARRAY OF CHAR; startPos: CARDINAL;
                    VAR destination: ARRAY OF CHAR);
  (* Inserts source into destination at position startPos *)

PROCEDURE Replace (source: ARRAY OF CHAR;
                   startPos: CARDINAL;
                   VAR destination: ARRAY OF CHAR);
  (* Copies source into destination, starting at position startPos. Copying stops when all of source has been copied, or when the last character of the string value in destination has been replaced. *)

PROCEDURE Append (source: ARRAY OF CHAR; VAR destination: ARRAY OF CHAR);
  (* Appends source to destination. *)

PROCEDURE Concat (source1, source2: ARRAY OF CHAR; VAR destination: ARRAY OF CHAR);
  (* Concatenates source2 onto source1 and copies the result into destination. *)

PROCEDURE CanAssignAll (sourceLength: CARDINAL; VAR destination: ARRAY OF CHAR): BOOLEAN;

PROCEDURE CanExtractAll (sourceLength, startPos,
                      numberToExtract: CARDINAL;
                      VAR destination: ARRAY OF CHAR): BOOLEAN;

PROCEDURE CanDeleteAll (stringLength, startPos, numberToDelete: CARDINAL): BOOLEAN;

PROCEDURE CanInsertAll (sourceLength, startPos: CARDINAL;
                     VAR destination: ARRAY OF CHAR): BOOLEAN;

PROCEDURE CanReplaceAll (sourceLength, startPos: CARDINAL;
                     VAR destination: ARRAY OF CHAR): BOOLEAN;

PROCEDURE CanAppendAll (sourceLength: CARDINAL;
                  VAR destination: ARRAY OF CHAR): BOOLEAN;

PROCEDURE CanConcatAll (source1Length, source2Length: CARDINAL;
                    VAR destination: ARRAY OF CHAR): BOOLEAN;

(* The following type and procedures provide for the comparison of string values, and for the location of substrings within strings. *)

TYPE
  CompareResults = (less, equal, greater);

PROCEDURE Compare (stringVal1, stringVal2: ARRAY OF CHAR): CompareResults;
  (* Returns less, equal, or greater, according as stringVal1 is lexically less than, equal to, or greater than stringVal2. *)

PROCEDURE Equal (stringVal1, stringVal2: ARRAY OF CHAR): BOOLEAN;
  (* Returns Strings.Compare(stringVal1, stringVal2) = Strings.equal *)

PROCEDURE FindNext (pattern, stringToSearch: ARRAY OF CHAR;
                    startPos: CARDINAL;
                    VAR patternFound: BOOLEAN;
                    VAR posOfPattern: CARDINAL);
  (* Looks forward for next occurrence of pattern in stringToSearch, starting the search at  position startPos. If startPos < LENGTH(stringToSearch) and pattern is found, patternFound is returned as TRUE, and posOfPattern contains the start position in stringToSearch of pattern. Otherwise patternFound is returned as FALSE, and posOfPattern is unchanged. *)

PROCEDURE FindPrev (pattern, stringToSearch:
                  ARRAY OF CHAR; startPos: CARDINAL;
                    VAR patternFound: BOOLEAN;
                    VAR posOfPattern: CARDINAL);
  (* Looks backward for the previous occurrence of pattern in stringToSearch and returns the position of the first character of the pattern if found. The search for the pattern begins at startPos. If pattern is found, patternFound is returned as TRUE, and posOfPattern contains the start position in stringToSearch of pattern in the range [0..startPos]. Otherwise patternFound is returned as FALSE, and posOfPattern is unchanged. *)

PROCEDURE FindDiff (stringVal1, stringVal2: ARRAY OF CHAR;
                    VAR differenceFound: BOOLEAN;
                    VAR posOfDifference: CARDINAL);
  (* Compares the string values in stringVal1 and stringVal2 for differences. If they are equal, differenceFound is returned as FALSE, and TRUE otherwise. If differenceFound is TRUE, posOfDifference is set to the position of the first difference; otherwise posOfDifference is unchanged. *)

PROCEDURE Capitalize (VAR stringVar: ARRAY OF CHAR);
  (* Applies the function CAP to each character of the string value in stringVar. *)

END Strings.

The Procedures Assign, Extract, Delete, Insert, Replace, Append, and Concat have the property that if the string they are constructing has a length exceeding the capacity of the variable parameter, the result is silently truncated. That is, the extra characters are discarded and no error condition is created. Of course, if the string created is shorter than the capacity of the variable parameter, the string terminator is appended.

Because this behaviour might create a problem for the client of this module, the procedures CanAssignAll, CanExtractAll, CanDeleteAll, CanInsertAll, CanReplaceAll, CanAppendAll, and CanConcatAll, are provided to test whether the corresponding command would work correctly (whether there is enough room) if attempted. The writer of the client program must decide whether to use these or not, and what the program should do if the test produces FALSE, indicating that the intended destination lacks the room for the result.

The procedure Assign takes the content of one string variable and assigns it to a second (which could have a different underlying type). If str1 and str2 are of the same formal type of string, one could also write

  str1 := str2;

instead of

  Assign (str2, str1);

so this procedure is not always necessary. However, if one had an ARRAY [2 .. n] OF CHAR (i.e. any array not zero based) then Assign could be used to give its contents to a string identifier. (i.e. one defined as of a type [0 .. m]) Also, if one had:

  TYPE
    ShortStrings = ARRAY [0 .. 10] OF CHAR;
    LongStrings = ARRAY [0 .. 79] OF CHAR;

  VAR
    name : ShortStringType;
    fullName : LongStringType;

then, an assignment like:

  fullName := name; 

would be illegal in standard Modula-2, because the two underlying types are different. In this case also, one must, import Assign from Strings and use

  Assign (name, fullName);

Note, however, that Assign is not needed in a few non-standard versions, which do allow direct assignments of the kind above as a language extension.

In general, it is best to use Assign and the other utility library procedures, treating string types as ADTs even though their structure is known to the program. An ADT String could be implemented that way. If it were, the parameters for these procedures would all be of type String rather than ARRAY [0 .. N - 1] of CHAR. Assign would then always be necessary to convert an ARRAY OF CHAR to a variable of type String, for one would not officially know how the type String is defined. In some ways this would be more in accord with sound Modula-2 programming practice than what is in fact done. One should therefore regard the implied type String as an anomaly--an exception to the general technique, rather than as a model to follow in devising user libraries implementing ADTs. (The details of how to create ADTs whose structure truly is invisible to client programs will be discussed in a later chapter.)

The procedure here called Extract is also known as Copy in some non-ISO versions. Its purpose is to assign a substring in the source to the destination.

Insert and Delete are closely related. One puts characters in to a string; the other takes them out. In both cases, the text following the insertion point is moved appropriately. Replace, on the other hand, copies new material into a destination string over the top of whatever was there. Suppose one had:

  str1 := " the old";
  str2 := "Nellie Hacker";

Then,

  Extract (str2, 3, 5, str3);

would result in str3 holding "lie H", while

  Insert (str1, 6, str2);

would result in str2 holding "Nellie the old Hacker". At this point,

  Delete (str2, 1, 14);

would result in str2 holding "Nacker". Now,

  Replace ("Si", 0, str2);

would result in str2 holding "Sicker".

The procedure Append attaches the first string supplied to the end of the second, returning the second string in an altered condition, whereas Concat does this, but places the result in a third parameter, leaving the original strings untouched. Concat and Length are shown here just as they were defined in the last section. Of course, the programmer who imports from this library neither knows nor cares whether the writer of the implementation part of this particular module actually wrote them in the same way as shown here.

The function Compare makes an alphabetical comparison of the strings str1 and str2. The result is less if str1 < str2; it is equal if str1 = str2, and it is greater if str1 > str2. If a test for equality is all that is needed, the procedure Equal will suffice. The comparison distinguishes upper and lower case according to their different positions in the ASCII sequence of characters.

Examples:

The following all return less.

Compare ("Cat", "Hat");
Compare ("at", "atrocious);"
Compare ("105", "108");  (* character, not numerical compare *)
Compare ("Albert", "albert"); (* upper case before lower case *)

The procedure FindNext (sometimes called Pos in non-standard versions) returns as the value of posOfPattern the cardinal of the starting position of the first string (pattern) if it is found in the second string (stringToSearch) after the given startPos. In this case, the BOOLEAN parameter patternFound is TRUE. It returns FALSE in patternFound if it is instructed to start at a point past the end of the target string, or if it does not find the target embedded in the source. In this case, posOfPattern is unchanged. Thus;

  str1 := "Johanna Meier the next hacker";
  FindNext ("Me", str1, 0, found, where);

would set found to TRUE and where to 8, whereas, if the third parameter were given as 10, found would be FALSE. The procedure FindPrev acts in the same way as FindNext but searches backward from the given starting point instead of forward. The procedure FindDiff compares two strings for equality character by character. If they are equal the BOOLEAN parameter differenceFound is set to FALSE. If there is a difference, that parameter is set to TRUE and the place at which they first differ is the value of the parameter posOfDifference.

The procedure Capitalize capitalizes all the letters in a string. The (non-standard and occasionally supplied) procedures FetchChar and AssignChar allow the client program to manipulate individual characters in items of type String abstractly (i.e. without knowing the structure, or if knowing it, without using that knowledge).

It is a useful exercise in understanding the meaning of string data to implement all these procedures yourself.

NOTES: 1. Some non-standard implementations export two or more convenience types such as String80 = ARRAY[0..80] OF CHAR.

2. Strings are implemented in a variety of ways in different languages and implementations. Modula-2 itself uses, as seen here, a terminated ARRAY [0 .. len - 1] OF CHAR. However, the associated operating system may not use Modula-2 style strings for such things as, say, file names. Thus if the programmer decides to manipulate the file system on a level lower than what is provided by the utility module Files, (see the next chapter for details) it will probably be necessary to convert Modula-2 strings that name files to the appropriate form for the actual file system being used. Facilities to make these changes may be provided in an appropriate conversions module, or they may have to be programmed.

To illustrate some of the ideas presented in these two sections, here is a simple program module that will accept an input line from the standard input, and then print it out back to front in two ways--letter by letter, and word by word. Note that it is written in a way that depends on knowing what is the structure of string variables.

MODULE Backwards;

(* Written by R.J. Sutcliffe *)
(* to illustrate string manipulation *)
(* using ISO Modula-2  *)
(* last revision 1996 12 04 *)

FROM STextIO IMPORT
  WriteLn, WriteChar, WriteString, ReadString, SkipLine;

TYPE
  String = ARRAY [0 .. 80] OF CHAR;
   
CONST
  blank = ' ';

VAR
  inStr : String;
  count, startWord, endWord : CARDINAL;

BEGIN
  WriteString ("Please type in the line to process ");
  WriteLn;
  ReadString (inStr);
  SkipLine;
  WriteLn;
  (* Write out original *)
  WriteString (inStr);
  WriteLn;

  (* Write backwards letter by letter *)
  FOR count := (LENGTH (inStr) - 1) TO 0 BY -1
    DO
      WriteChar (inStr [count]);
    END;
  WriteLn;

  (* Now get ready to print the word-reversed string *)
  endWord := LENGTH (inStr) - 1;
  REPEAT
    WHILE (endWord > 0) AND (inStr [endWord] = blank)
      DO
        DEC (endWord);    (* find last non-blank *)
      END;    (* it will be the end of a word *)
    startWord := endWord;    (* set count; find start of word *)
    WHILE (startWord > 0)  AND (inStr [startWord - 1] # blank)
      DO
        DEC (startWord)   (* move back to letter *)
      END;                (* following a blank *)
    (* startword and endword now delimit a word *)
  
  (* now print one word *)
    FOR count := startWord TO endWord
      DO
        WriteChar (inStr [count]);
      END;
    IF startWord # 0   (* except for the first word *)
      THEN      (* WriteChar a blank *)
        WriteChar (blank);   (* after the word *)
        endWord := startWord - 1   (* reset wordend, carry on *)
      ELSE
        endWord := startWord
      END;
  UNTIL endWord = 0
    
END Backwards.

Sample run:

Now is the time for all good men to come to the aid of the party.
.ytrap eht fo dia eht ot emoc ot nem doog lla rof emit eht si woN
party. the of aid the to come to men good all for time the is Now

Contents