/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-------------------------------------------------------------
** Changed getdefaults to allow metaNames in the user
** configuration file
** G.Hill 4/16/97 ghill@library.berkeley.edu
**
** change sprintf to snprintf to avoid corruption, and use MAXSTRLEN from swish.h
** added safestrcpy() macro to avoid corruption from strcpy overflow
** SRE 11/17/99
**
** added buffer size arg to grabStringValue - core dumping from overrun
** fixed logical OR and other problems pointed out by "gcc -Wall"
** SRE 2/22/00
** 
** counter modulo 128 had parens typo
** SRE 2/23/00
**
** read stopwords from file
** Rainer Scherg (rasc)  2000-06-15
** 
*/

#include "swish.h"
#include "file.h"
#include "mem.h"
#include "string.h"
#include "error.h"
#include "list.h"
#include "hash.h"
#include "index.h"

/* Is a file a directory?
*/

int isdirectory(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFMT) == S_IFDIR) ? 1 : 0;
}

/* Is a file a regular file?
*/

int isfile(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 1 : 0;
}

/* Is a file a link?
*/

int islink(path)
char *path;
{
#ifndef NO_SYMBOLIC_FILE_LINKS
	struct stat stbuf;
	
	if (lstat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFLNK) == S_IFLNK) ? 1 : 0;
#else
	return 0;
#endif
}

/* Get the size, in bytes, of a file.
** Return -1 if there's a problem.
*/

int getsize(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return -1;
	return stbuf.st_size;
}

/* Add an entry to the metaEntryList with the given value and the
** appropriate index
*/

void addMetaEntry(metaList, metaWord, isDocProp)
struct metaEntry** metaList;
char* metaWord;
int isDocProp;
{
	static int counter;
	int i;
	struct metaEntry* newEntry;
	struct metaEntry* tmpEntry;
	
	if(metaWord == NULL || metaWord[0]=='\0') return;
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);
	
	/* 06/00 Jose Ruiz - Check for automatic metanames
	*/
	if(((int)strlen(metaWord)==9) && memcmp(metaWord,"automatic",9)==0) {
		applyautomaticmetanames =1;
		return;
	}
	if (counter == 0)
		counter = 2;
	else if (counter == 1 ||  !(counter % 128) )
		counter++;

	tmpEntry = *metaList;
	while (tmpEntry)
	{
		if (strcmp(tmpEntry->metaName, metaWord) == 0)
		{
			/*
			 * found a duplicate entry already in the list.
			 * Since there are two different config tags that can
			 * be used to get here (MetaNames and PropertyNames)
			 * and that might be using the same Meta tag name,
			 * we cannot assume that either one of these was
			 * called first.
			 * The semantics we want for the metaEntry are:
			 *	isDocProperty = 1 if in PropertyNames, else 0
			 *	isOnlyDocProperty = 1 if not in MetaNames, else 0
			 */
			if (isDocProp)
			{
				/* this is a DocumentProperty tag */
				if (!tmpEntry->isDocProperty)
				{
					tmpEntry->isDocProperty = 1;
				}
			}
			else
			{
				/* this is a MetaName tag */
				if (tmpEntry->isDocProperty)
				{
					tmpEntry->isOnlyDocProperty = 0;
				}
			}

			return;
		}
		tmpEntry = tmpEntry->next;
	}

	newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));

	/* isDocProp is true when we see the PropertyNames config tag */
	newEntry->isDocProperty = isDocProp;
	newEntry->isOnlyDocProperty = isDocProp;

	newEntry->metaName = (char*)estrdup(metaWord);
	newEntry->index = counter++;
	newEntry->next = NULL;
	if (*metaList)
	{
		for(tmpEntry=*metaList;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
			;
		tmpEntry->next = newEntry;
	}
	else
		*metaList = newEntry;
	
	return;
}

/*
 * Some handy routines for parsing the Configuration File
 */

int grabYesNoField(line, commandTag, yesNoValue)
char* line;
char* commandTag;
int* yesNoValue;
{
	char *value;
	if ((value = getconfvalue(line, commandTag)))
	{
		*yesNoValue = (lstrstr(value, "yes")) ? 1 : 0;
		return 1;	/* matched commandTag */
	}
	return 0;
}

/* 05/00 Jose Ruiz
** Function rewritten
*/
char *grabStringValueField(line, commandTag)
char* line;        /* line of input to be parsed */
char* commandTag;  /* constant string to look for */
{
	return(getconfvalue(line, commandTag));
}

int grabIntValueField(line, commandTag, singleValue, dontToIt)
char* line;
char* commandTag;
int* singleValue;
int dontToIt;
{
	char *value;
	if (!(value = grabStringValueField(line, commandTag)))
		return 0;
	
	if ((value[0]) && (value[0] != '\n') && !dontToIt)
	{
		*singleValue = atoi(value);
	}
	return 1;	/* matched commandTag */
}


int grabCmdOptionsMega(line, commandTag, listOfWords, gotAny, dontToIt)
char* line;
char* commandTag;
struct swline **listOfWords;
int* gotAny;
int dontToIt;
{
static int lenvalue=0;
static char *value=NULL;
int skiplen;

	if(!lenvalue) value = (char *) emalloc((lenvalue=MAXSTRLEN) +1);
	/*
	 * parse the line if it contains commandTag 
	 * (commandTag is not required to be the first token in the line)
	 * Grab all of the words after commandTag and place them in the listOfWords.
	 * If "gotAny" is not NULL then set it to 1 if we grabbed any words.
	 * If dontDoIt is "1" then do not grab the words.
	 * Line may be "<commandTag> <stringValue> .." but it could also
	 * be "<other commands> <commandTag> <stringValue> .."
	 */
	line = lstrstr(line, commandTag);	/* includes main command tag? */
	if (line == NULL)
		return 0;
	line += strlen(commandTag);
	
	/* grab all words after the command tag */
	if (!dontToIt)
	{
		while (1) 
		{
			value = SafeStrCopy(value, getword(line, &skiplen),&lenvalue);
			/* BUG 2/22/00 - SRE - next line had one | and one || */
			if (!skiplen || value[0] == '\0' || value[0] == '\n')
			{
				break;
			}
			else 
			{
				line += skiplen;
				*listOfWords = (struct swline *) addswline(*listOfWords, value);
				if (gotAny)
					*gotAny = 1;
			}
		}
	}
	return 1;
}

int grabCmdOptions(line, commandTag, listOfWords)
char* line;
char* commandTag;
struct swline **listOfWords;
{
	return grabCmdOptionsMega(line, commandTag, listOfWords, NULL, 0);
}

/* Reads the configuration file and puts all the right options
** in the right variables and structures.
*/

void getdefaults(conffile, hasdir, hasindex, plimit, flimit, hasverbose)
char *conffile;
int *hasdir;
int *hasindex;
long *plimit;
long *flimit;
int hasverbose;
{
	int i, gotdir, gotindex;
	char *c, line[MAXSTRLEN], *StringValue;
	FILE *fp;
	int linenumber = 0;
	int baddirective = 0;
	StringList *sl;
	
	gotdir = gotindex = 0;
	
	if ((fp = fopen(conffile, "r")) == NULL  ||
		!isfile(conffile) ) 
	{
		snprintf(errorstr, MAXSTRLEN, "Couldn't open the configuration file \"%s\".", conffile);
		progerr(errorstr);
	}

	while (fgets(line, MAXSTRLEN, fp) != NULL) 
	{
		linenumber++;
		if (line[0] == '#' || line[0] == '\n')
			continue;
		if (grabCmdOptionsMega(line, "IndexDir", &dirlist, &gotdir, *hasdir)) {}
		else if (grabCmdOptions(line, "NoContents", &nocontentslist)) {}
		else if (grabCmdOptionsMega(line, "IndexFile", &indexlist, &gotindex, *hasindex)) {}
		else if (grabIntValueField(line, "IndexReport", &verbose, hasverbose))	{}
		else if (grabIntValueField(line, "MinWordLimit", &minwordlimit, 0))	{}
		else if (grabIntValueField(line, "IndexComments", &indexComments, 0))	{}
		else if (grabIntValueField(line, "MaxWordLimit", &maxwordlimit, 0))	{}
		else if ((StringValue=grabStringValueField(line, "WordCharacters")))	{
			wordchars = SafeStrCopy(wordchars,StringValue,&lenwordchars);
			sortstring(wordchars);
			makelookuptable(wordchars,wordcharslookuptable);
		}
		else if ((StringValue=grabStringValueField(line, "BeginCharacters")))	{
			beginchars = SafeStrCopy(beginchars,StringValue,&lenbeginchars);
			sortstring(beginchars);
			makelookuptable(beginchars,begincharslookuptable);
		}
		else if ((StringValue=grabStringValueField(line, "EndCharacters")))	{
			endchars = SafeStrCopy(endchars,StringValue,&lenendchars);
			sortstring(endchars);
			makelookuptable(endchars,endcharslookuptable);
		}
		else if ((StringValue=grabStringValueField(line, "IgnoreLastChar")))	{
			ignorelastchar = SafeStrCopy(ignorelastchar,StringValue,&lenignorelastchar);
			sortstring(ignorelastchar);
			makelookuptable(ignorelastchar,ignorelastcharlookuptable);
		}
		else if ((StringValue=grabStringValueField(line, "IgnoreFirstChar")))	{
			ignorefirstchar = SafeStrCopy(ignorefirstchar,StringValue,&lenignorefirstchar);
			sortstring(ignorefirstchar);
			makelookuptable(ignorefirstchar,ignorefirstcharlookuptable);
		}
		else if (grabCmdOptions(line, "ReplaceRules", &replacelist)) { checkReplaceList(); }
		else if (grabYesNoField(line, "FollowSymLinks", &followsymlinks))	{}
		else if ((StringValue=grabStringValueField(line, "IndexName")))	{
			indexn = SafeStrCopy(indexn,StringValue,&lenindexn);
		}
		else if ((StringValue=grabStringValueField(line, "IndexDescription")))	{
			indexd = SafeStrCopy(indexd,StringValue,&lenindexd);
		}
		else if ((StringValue=grabStringValueField(line, "IndexPointer")))	{
			indexp = SafeStrCopy(indexp,StringValue,&lenindexp);
		}
		else if ((StringValue=grabStringValueField(line, "IndexAdmin")))	{
			indexa = SafeStrCopy(indexa,StringValue,&lenindexa);
		}
		else if (grabYesNoField(line, "UseStemming", &applyStemmingRules))	{}	/* 11/24/98 MG */
		else if (grabYesNoField(line, "IgnoreTotalWordCountWhenRanking", &ignoreTotalWordCountWhenRanking))	{}	/* 11/24/98 MG */
                else if (grabYesNoField(line, "UseSoundex", &applySoundexRules))        {}      /* 09/01/99 DN */
                else if ((StringValue=grabStringValueField(line, "FilterDir")))    {      /* 1999-05-05 rasc */
			filterdir = SafeStrCopy(filterdir,StringValue,&lenfilterdir);
		}
                else if ((c = (char *) lstrstr(line, "FileFilter"))) {
        /* 1999-05-05 rasc */
                                     /* FileFilter fileextension  filerprog */
                        c += strlen("FileFilter");
			sl=parse_line(c);
			if(sl && sl->n==2) {
				filterlist = (struct filter *) addfilter(filterlist,sl->word[0],sl->word[1],filterdir);
				freeStringList(sl);
			} else progerr("FileFilter requires two values");
                }
		else if ((c = (char *) lstrstr(line, "MetaNames")) != 0)  /* gcc -Wall, 2/22/00 */
		{
			c += strlen("MetaNames");
			sl=parse_line(c);
			if(sl && sl->n) {
				for(i=0;i<sl->n;i++)
					addMetaEntry(&metaEntryList,sl->word[i], 0);
				freeStringList(sl);
			} else progerr("MetaNames requires at least one value");
		}
		else if ((c = (char *) lstrstr(line, "TranslateCharacters")) != 0)  
		{
			c += strlen("TranslateCharacters");
			sl=parse_line(c);
			if(sl && sl->n==2) {
				translatechars1=SafeStrCopy(translatechars1,sl->word[0],&lentranslatechars1);
				translatechars2=SafeStrCopy(translatechars2,sl->word[1],&lentranslatechars2);
				freeStringList(sl);
				if(strlen(translatechars1)!=strlen(translatechars2)) progerr("TranslateCharacters option requires two values of the same length");
				
			} else progerr("TranslateCharacters requires two values");
		}
		else if ((c = (char *) lstrstr(line, "PropertyNames")) != 0)	/* 11/24/98 MG */ /* gcc -Wall, 2/22/00 */
		{
			c += strlen("PropertyNames");
			sl=parse_line(c);
			if(sl && sl->n) {
				for(i=0;i<sl->n;i++)
					addMetaEntry(&metaEntryList,sl->word[i], 1);
				freeStringList(sl);
			} else progerr("PropertyNames requires at least one value");
		}
		else if ((c = (char *) lstrstr(line, "IgnoreWords")) != 0) {  /* gcc -Wall, 2/22/00 */
			c += strlen("IgnoreWords");
			sl=parse_line(c);
			if(sl && sl->n) {
				if (lstrstr(sl->word[0], "SwishDefault")) {
						readdefaultstopwords();
				} else if (lstrstr(sl->word[0], "File:")) {  /* 2000-06-15 rasc */
				        if (sl->n == 2) 
					    readstopwordsfile(sl->word[1]);	
					else progerr ("IgnoreWords File: requires path");
				} else for(i=0;i<sl->n;i++) {
					addstophash(sl->word[i]);
				}
				freeStringList(sl);
			} else progerr("IgnoreWords requires at least one value");
		}
		else if ((c = (char *) lstrstr(line, "IgnoreLimit"))) {
			c += strlen("IgnoreLimit");
			sl=parse_line(c);
			if(sl && sl->n==2) {
				*plimit = atoi(sl->word[0]);
				*flimit = atoi(sl->word[1]);
				freeStringList(sl);
			} else progerr("IgnoreLimit requires two values");
		}
		/* IndexVerbose is supported for backwards compatibility */
		else if ((c = (char *) lstrstr(line, "IndexVerbose")) != 0) {  /* gcc -Wall, 2/22/00 */
			c += strlen("IndexVerbose");
			sl=parse_line(c);
			if(sl && sl->n==1) {
				verbose = (lstrstr(sl->word[0], "yes")) ? 3 : 0;
				freeStringList(sl);
			} else progerr("IndexVerbose require one value");
		}
		else if (grabCmdOptions(line, "IndexOnly", &suffixlist)) {}
		else if (!parseconfline(line)) {
			printf("Bad directive on line #%d: %s", linenumber, line );
			baddirective = 1;
		}
	}
	fclose(fp);
	
	if (baddirective)
		exit(1);
	if (gotdir && !(*hasdir))
		*hasdir = 1;
	if (gotindex && !(*hasindex))
		*hasindex = 1;
}

/* Checks that all the regex in the replace list are correct */
void checkReplaceList() 
{
struct swline *tmpReplace;
static int lenrule=0;
static char *rule=NULL;
static int lenpatt=0;
static char *patt=NULL;
regex_t re;
int status;
	
	if(!lenrule) rule = (char *) emalloc((lenrule=MAXSTRLEN) + 1);
	if(!lenpatt) patt = (char *) emalloc((lenpatt=MAXSTRLEN) + 1);
	tmpReplace = replacelist;
	while (tmpReplace) {
		rule = SafeStrCopy(rule,tmpReplace->line,&lenrule);
		
		/*  If it is not replace, just do nothing */
		if (lstrstr(rule,"append") || lstrstr(rule,"prepend") ) {
			if (tmpReplace->next){
				tmpReplace = tmpReplace->next;
			}
			else
				return;
		}
		if (lstrstr(rule,"replace")) {
			tmpReplace = tmpReplace->next;
			patt = SafeStrCopy(patt,tmpReplace->line,&lenpatt);
			if (patt == NULL) 
				return;
			status = regcomp(&re,patt, REG_EXTENDED);
			regfree(&re); /** Marc Perrin ## 18Jan99 **/
			if (status != 0) {
				printf ("Illegal regular expression %s\n",patt);
				exit(0);
			}
			
			if (tmpReplace->next) 
				tmpReplace = tmpReplace->next;
			else {
				return;
			}
		}
		tmpReplace = tmpReplace->next;
	}
}


/*
  read stop words from file
  lines beginning with # are comments
  2000-06-15 rasc

*/

void readstopwordsfile (char *stopw_file) {

  char line[MAXSTRLEN];
  FILE *fp;
  StringList *sl;
  int   i;


#ifdef DEBUG
	   printf ("Open StopWordfile:  %s\n",stopw_file);
#endif

  if ((fp=fopen(stopw_file, "r")) == NULL || !isfile(stopw_file) ) {
      snprintf(line, MAXSTRLEN, "Couldn't open the stopword file \"%s\".",
	       stopw_file);
      progerr(line);
  }


  /* read all lines and store each word as stopword */

  while (fgets(line, MAXSTRLEN, fp) != NULL) {
      if (line[0] == '#' || line[0] == '\n') continue; 

      sl=parse_line(line);
      if(sl && sl->n) {
	for(i=0;i<sl->n;i++) {
	   addstophash(sl->word[i]);
#ifdef DEBUG
	   printf ("  %s\n",sl->word[i]);
#endif
        }
	freeStringList(sl);
      }
  }

  fclose (fp);
  return;
}
