#define MICROSOFT 1
/*
Creates a word list of arbitrary length, balanced for valence, and
within valence, balanced for word length and word frequency.
Uses the data file words.prn to obtain its normative data.
*/

// ANEW ratings run from 1-9. 
// negative = 1-3
// neutral = 4-6
// positive = 7-9
// by default we only take words with low arousal (<5)

#include<stdio.h>
#include<conio.h>
#include<iostream.h>
#include<dos.h>                                                                    
#include<stdlib.h>
#include<string.h>
#include<time.h>
#include<math.h>
//#include<alloc.h>

#define MAXWDLEN 15

class valword {
  public:
  char text[30];
  int  WdNum,   Frequency,len,chosen,valence;
  float ValMn,  ValSd,  AroMn,  AroSD,  DomMN,  DomSD   ;
  };

// ms compatibility
#ifdef MICROSOFT
int random(int num) {return(rand() % num);}
void randomize(){
   srand( (unsigned)time( NULL ) );
}
#endif

int round(float num){return((int) (num+0.5));}
int max(int a, int b){return (a>b) ? a : b;}


// reads words from an existing word file and selects words not in it.
void markbadwords(char negname[12],int totwords,valword *(wordlist[])){
   FILE *badwordfp;
   int i,j;
   char ret[255];
   char word[30];
   int numwds;
   if(!(badwordfp=fopen(negname,"r"))) {
     printf("Couldn't open bad word file %s\n",negname);
     exit(0);
     }

   fscanf(badwordfp,"%d",&numwds); fgets(ret,MAXWDLEN,badwordfp);
   for (i=0;i<numwds;i++){
     fgets(ret,255,badwordfp); // get a line
     sscanf(ret,"%s",&word); // extract the word
     printf(".");
     for (j=0;j<totwords;j++){
	if (!strcmp(word,wordlist[j]->text)) {
	  wordlist[j]->chosen=1; j=totwords;}
	}
   }
   fclose(badwordfp);
}


// reads words from the ANEW.txt file
// does ok up to word 175 (176- concentrate through 184 are bad) by word read #186
int getwords(char *fname, valword *(wordlist[])) {
  FILE *fp=fopen(fname,"r");
  int index=0,lnum;
  char str[260];
  float ValMn,AroMn;

  // why do I need the following not to get a float not found error?
/*
	int b;
	float a,d;
	char c[30];
	FILE *fp2=fopen("test.txt","r");
	//printf("Type somethings --> ");
	fscanf(fp2,"%s",&c);
	fscanf(fp2,"%d",&b);
	fscanf(fp2,"%f",&a);
	fscanf(fp2,"%f",&d);
	//printf("%s,%d,%f,%f\n",c,b,a,d);
	fclose(fp2);
*/
   

  ValMn= (float) 3.2;
  for(lnum=1;lnum<42;lnum++) fgets(str,256,fp); // bypass the first 41 lines

  while (lnum++<1072){
    fgets(str,256,fp);
    wordlist[index]=(new valword);
    wordlist[index]->chosen=0;
    sscanf(str,"%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%d",
	   &wordlist[index]->text,
	   &wordlist[index]->WdNum,
	   &wordlist[index]->ValMn,
	   &wordlist[index]->ValSd,
	   &wordlist[index]->AroMn,
	   &wordlist[index]->AroSD,
	   &wordlist[index]->DomMN,
	   &wordlist[index]->DomSD,
	   &wordlist[index]->Frequency);
    //if(index%50==0)
    //  printf("%d:%s\n",index,wordlist[index]->text);
    //if((index> 176) && (index < 200))
    //  printf("%s,%d,%f,%f\n",wordlist[176]->text,wordlist[176]->WdNum,
    //       wordlist[176]->ValMn,wordlist[176]->ValSd);

    ValMn=wordlist[index]->ValMn;
    AroMn=wordlist[index]->AroMn;
    wordlist[index]->valence=0; // start it as unuseable
    // select valence based on lying within desired valence and arousal
    // ranges.
    // valence: -: 1-3, N:4-6, +:7-9
    // arousal is based on the median for the valence +/- 25 words on
    //   each side to get rid of noise
    if(wordlist[index]->ValSd<=2) // if the valence is fairly reliable
      if (ValMn<3) { // negative
	 if      (AroMn<=5.34) wordlist[index]->valence=2; // -
	 else if (AroMn>=6.05) wordlist[index]->valence=5; // -a
	 }
      else if (ValMn>7) {
	 if      (AroMn<=5.47) wordlist[index]->valence=1; // +
	 else if (AroMn>=5.90) wordlist[index]->valence=4; // +a
	 }
      else if ((ValMn>4) && (ValMn<6)) {
	 if      (AroMn<=3.98) wordlist[index]->valence=3; // N
	 else if (AroMn>=4.23) wordlist[index]->valence=6; // Na
	 }

    wordlist[index]->len=strlen(wordlist[index]->text);
	//cout << index << " " << wordlist[index]->text << "\n";
	//getch();
	index++;
  }
  return(index);
}

#define UNUSED -1
static char VALENCES[6][3]= {"+ ","- ","N ","+a","-a","Na"};

void printchosen(valword *wordlist[],int totwords) {
  int count=1;
  printf("\ncount\tWord\t\tVal\tValMn\tAroMn\tLen\tFrequency\n");
  for (int i=0;i<totwords;i++) {
    if(wordlist[i]->chosen)
      printf("%d\t%-15s\t%s\t%.3f\t%.3f\t%d\t%d\t\n", count++,
	     wordlist[i]->text, 
		 VALENCES[wordlist[i]->valence -1],
		 wordlist[i]->ValMn,
		 wordlist[i]->AroMn,wordlist[i]->len, wordlist[i]->Frequency);
  }
  //printf("---------------------------------\n");
}


// we only want to pick words we can balance
void choosewords(valword *wordlist[], int totwords, int numwords, int numvalences){
  int current,j,k,l,currother[7];
  int counter=0, nomatch=0;
  
  //printf("Choosing words\n");
  while (counter<(numwords-(numvalences-1))) {
    for (j=1; (j<numvalences && (counter<numwords-(numvalences-1))) ; j++) {
    // pick a word
    PICKWORD:
      ;
      current=random(totwords);
      while ((wordlist[current]->chosen) || wordlist[current]->valence !=j)
        current=random(totwords);
      wordlist[current]->chosen=1; counter++;
      //printf("\nWord %d=%s ",counter,wordlist[current]->text);

      // try to match it with each other valence, 100 times at most

      for (k=1;k<=numvalences;k++) currother[k]=UNUSED;
      for (k=1; k<=numvalences; k++) {
        if(k==j) k++; // bypass the one we're on
        if (counter <= numwords-1) {
          currother[k]=random(totwords);
          nomatch=0;
          while (((wordlist[currother[k]]->chosen==1) ||
                  (wordlist[currother[k]]->valence !=k) ||
                  (wordlist[currother[k]]->len > (wordlist[current]->len+2)) ||
                  (wordlist[currother[k]]->len < (wordlist[current]->len-2)) ||
                  (wordlist[currother[k]]->Frequency > (wordlist[current]->Frequency+10)) ||
		  (wordlist[currother[k]]->Frequency < (wordlist[current]->Frequency-10))
		  //badword(wordlist[currother[k]]->text,badwords)
		  )
                 && (nomatch++<100)
                 )
            {
	      //printf(".");
	      currother[k]=random(totwords);
	    }
		  if ((wordlist[currother[k]]->chosen==1) ||
                  (wordlist[currother[k]]->valence !=k) ||
                  (wordlist[currother[k]]->len > (wordlist[current]->len+2)) ||
                  (wordlist[currother[k]]->len < (wordlist[current]->len-2)) ||
                  (wordlist[currother[k]]->Frequency > (wordlist[current]->Frequency+10)) ||
		  (wordlist[currother[k]]->Frequency < (wordlist[current]->Frequency-10)) ||
                  (nomatch>100))
			currother[k]=UNUSED;


          if (nomatch>100) {
	    //pick another original word
            wordlist[current]->chosen=0; counter--;// kill current word
            for(l=1;l<=k;l++)
	      if(currother[l]!=UNUSED) {wordlist[currother[l]]->chosen=0; counter--;} // kill matches
            //printf(" rejected ");
            goto PICKWORD;
          }
          wordlist[currother[k]]->chosen=1; counter++;
          //printf("%s ",wordlist[currother[k]]->text);
        }
      }
	  //printchosen(wordlist,totwords);
    }
  }
}



void main(int argc, char *argv[]) {
  valword *wordlist[1200];
  char fname[12];
  int arousew=1,i=0;

  sprintf(fname,"ANEW.txt");
  //*wordlist=(valword *) malloc(1200*sizeof(int));
  //if (wordlist==0) {printf("No Memory"); exit(0);}
  int numwords=12,totwords=0;

  

  if (argc<2) {
	printf("Usage: genanew #words [YES/NO arousewords] [exclude filename]\n");
    printf("To run the wordlist program you must type\n");
    printf("the total number of words you want.\n");
    printf("  You may elect no arousal words by typing\n");
    printf("the word 'NO' after the word file.\n");
	printf("   You may specify a file of words to exclude\n");
	printf("as a third argument.");
    //printf("  You may specify a word file by typing that\n");
    //printf("after the number.\n");
    printf("  So, a sample command might be\n");
    printf("      wordlist 16 NO\n");
    printf("which would generate a list of 16 non-arousing\n");
    printf("positive, negative, and neutral words.\n");
    //exit(0);
  }
  else numwords=atoi(argv[1]);
  if (argc>2) {if(argv[2][0]=='N') arousew=0;}

  randomize();
  totwords=getwords(fname,wordlist);
  if (argc>3 && strlen(argv[3])>1) {
    markbadwords(argv[3],totwords,wordlist);
    }

  choosewords(wordlist,totwords,numwords,arousew ? 6 : 3);
  printchosen(wordlist,totwords);
  
  for(i=0;i<totwords;i++){delete wordlist[i];}
  printf("Done.\n");
  //free(wordlist);
}
