(*	GETPATTERN - uses a recursive descent recogniser to process the pattern
	argument given on the command line and produce the internal data
	structure. The pattern recognised is given by the following extended
	BNF description :-

	pattern     = [ startmatch ] { item } [ endmatch ].
	item        = ( singlechar | matchany | group) [ repetition ].
	group       = groupstart [ negategroup ] { singlechar } groupend.

	startmatch  = "^".
	endmatch    = "$".
	matchany    = ".".
	repetition  = "*".
	groupstart  = "[".
	negategroup = "^".
	groupend    = "]".
	singlechar  = "\" anychar | anynonspecialchar.

	Pattern, item and group are recognised by procedures getpattern,
	getitem, and getgroup, respectively. The structure of these procedures
	follows their respective extended BNF description very closely.
	The remaining productions in the above syntax are all recognised by
	the lexical analyser getnext.

	The following predicate is used to track the recognition process
	where X may be empty:
		recog(X) = matcharg(1,argpos,X)
		matcharg(i,i,"x") = (argchars[i] = 'x')
		matcharg(i,j,X) and matcharg(j+1,k,Y) = matcharg(i,k,X Y)
		recog(X E) and (p = E.) = recog(X p)
		recog(X) or recog(X {Y} Y) ==> recog(X {Y})
		recog(X Y) or recog(X Z) ==> recog(X (Y | Z))
		recog(X Y) or recog(X) ==> recog(X [Y])
*)

procedure getpattern(var pattern : patterntype);
const
	(* Maximum length of the argument string. Should be larger *)
	maxarg = 70;	(* but smaller values make testing easier *)
type
	(* Lexical tokens returned by getnext *)
	pattype = (endofpat,
		   startmatch,
		   endmatch,
		   repetition,
		   matchany,
		   groupstart,
		   groupend,
		   singlechar);
	(* Argument Structure *)
	argindextype = 0..maxarg;
	argtype = record
			(* Contains argument string from command line *)
			argchars : array[1..maxarg] of char;
			(* Number of characters in argument with trailing
			  blanks ignored *)
			arglen : argindextype
		      end;
var
	(* Position of last character of the argument processed by getnext *)
	argpos : argindextype;
	arg : argtype;		(* Argument from the command line *)
	(* Lexical token returned by getnext *)
	token : pattype;
	ch : char;

#include "getarg.i"

    (* GETNEXT - is the lexical analyser for the pattern string.
     It is responsible for :-
	1) returning the next token in the argument,
	2) recognising special (magical) characters,
	3) processing of the escape character which makes special characters
	    no longer special, and
	4) detecting the end of the pattern
      The following relationship is satisfied by getnext:
		{ recog(X) and 0 <= argpos <= arglen }
		getnext
		{ recog(X token) and 0 <= argpos <= arglen
		  and token = singlechar ==> recog(X '\' ch) or recog(X ch) }
    *)
    procedure getnext;
    begin	{ 0 <= argpos <= arglen and recog(X) }
	with arg do
	    begin
		if argpos = arglen then
			(* have reached the end of the pattern argument *)
			token := endofpat
			{ recog(X) and argpos = arglen}
			{ recog(X endofpat) }
		else	{ 0 <= argpos < arglen }
		    begin
			argpos := argpos + 1;
			{ recog(X argchars[argpos]) }
			{ 1 <= argpos <= arglen }
			if argchars[argpos] in['^','$','*','.','[',']','\']then
			    (* next character in pattern is special *)
			    case argchars[argpos] of
			    '^': token := startmatch;
				 { recog(X token) and token = startmatch }
			    '$': token := endmatch;
				 { reco(X token) and token = endmatch }
			    '*': token := repetition;
				 { recog(X token) and token = repetition }
			    '.': token := matchany;
				 { recog(X token) and token = matchany }
			    '[': token := groupstart;
				 { recog(X token) and token = groupstart }
			    ']': token := groupend;
				 { recog(X token) and token = groupend }
			    (* Process the escape character *)
			    '\': if argpos = arglen then
					error('"\" at end of pattern')
					{ false }
				 else	{ 1 <= argpos < arglen }
				     begin
					{ recog(X '\') and 1 <= argpos < arglen }
					argpos := argpos + 1;
					{ 2 <= argpos <= arglen and
					  recog(X '\' argchars[argpos]) }
					ch := argchars[argpos];
					{ recog(X '\' ch) }
					token := singlechar
					{ recog(X '\' ch) and token=singlechar }
				     end
			    end (* case *)
			    { recog(X token) and
				token = singlechar ==> recog(X '\' ch) }
			else { non-special character }
			    begin
				{ recog(X argchars[argpos]) }
				ch := argchars[argpos];
				{ recog(X ch) }
				token := singlechar
				{ recog(X ch) and recog(X token) and token = singlechar }
			    end
		    end (* if *)
	    end (* with *)
	{ recog(X token) and 0 <= argpos <= arglen and
	  token = singlechar ==> recog(X '\' ch) or recog(X ch) }
    end (* getnext *);

    (* GETITEM - recognise an item in the pattern.
	The following relationship is satisfied by getitem:
		{ recog(X token) and not(token in [endofpat,endmatch]) }
		getitem
		{ recog(X item token) }
    *)
    procedure getitem(var it : itemtype);

	(* GETGROUP - recognise a group of character in an item.
	    The following relationship is satisified by getgroup:
		{ recog(X groupstart) }
		getgroup
		{ recog(X group token) }
	*)
	procedure getgroup(var group : charset);
	const
	    negategroup = startmatch;
	var
	    negate : Boolean;
	begin (* getgroup *)
	    { recog(X groupstart) }
	    getnext;
	    { recog(X groupstart token) }
	    group := [];	negate := false;
	    if token = negategroup then
		begin	(* Will match inverse of characters following *)
		    negate := true;
		    getnext
		    { recog(X groupstart negategroup token) }
		end;
	    { recog(X groupstart [ negategroup ] token) }
	    (* Accumulate the set of characters *)
	    (* recog(X groupstart [ negategroup ] { singlechar } token *)
	    while token = singlechar do
		begin
		    group := group + [ch];
		    getnext
		end;
	    (* recog(X groupstart [ negategroup ] { singlechar } token) *)
	    if token = groupend then
		getnext
	    else
		error('closing group bracket "]" expected');
	    (*recog(X groupstart [negategroup] {singlechar} groupend token)*)
	    if negate then
		(* Form the inverse of the set of characters specified *)
		group := [minchar .. maxchar] - group
	    { recog(X group token) }
	end (* getgroup *);

    begin (* getitem *)
	with it do
	{ not (token in [endofpat, endmatch]) and recog(X token) }
	    begin
		if token in [matchany, groupstart, singlechar] then
			case token of
			matchany :  begin
					possiblechars := [minchar .. maxchar];
					getnext
				    end;
				    { recog(X matchany token) }
			groupstart : getgroup(possiblechars);
				     { recog(X group token) }
			singlechar : begin
					possiblechars := [ch];
					getnext
				     end
				     { recog(X singlechar token) }
			end
			{ recog(X (matchany | group | singlechar) token) }
		else
			error('special character in wrong place');
		repeated := (token = repetition);
		if repeated then
		    getnext
		{ recog(X (matchany | group | singlechar) [repetition] token) }
	    end (* with *)
	{ recog(X item token) }
    end (* getitem *);

begin (* getpattern *)
    getarg(1,arg);
    argpos := 0;
    { recog() }
    getnext;
    { recog(token) }
    (* From this point on token always contains the pattern element type
	of the next token in the argument to be recog.
	This condition is maintained by all procedures (getpattern, getitem,
	and getgroup) used in the recognition process *)
    with pattern do
	begin
	    patlen := 0;
	    matchatstart := false;    matchatend := false;
	    if token = startmatch then
		begin	(* pattern to match from beginning of line *)
		    matchatstart := true; getnext
		end;
	    { recog([ matchatstart ] token) }
	   (* match contiguous area of the line *)
	   (* recog([matchatstart] { item } token) *)
	    while not(token in [endofpat, endmatch]) and (patlen < maxpat) do
		begin
		    patlen := patlen + 1;
		    getitem(patitems[patlen])
		end;
	    (* recog([matchatstart] { item } token) *)
	    if token = endmatch then
		begin	(* pattern must match to end of line *)
		    matchatend := true; getnext
		end;
	    (* recog([ matchatstart ] { item } [ matchatend ] token) *)
	    { recog(pattern token) }
	    if token <> endofpat then
		error('"$" in mid pattern or pattern too long')
	end (* with *)
    { recog(pattern endofpat) }
end (* getpattern *);
