/*
 * Main.java
 *
 * Created on 21 februari 2006, 21:10
 *
 */

package DOPParser;

/**
 *
 * @author gideon
 */
import java.io.*;

public class Main {
       
	public Grammar myGrammar;
	
    //special symbols
    public final static boolean PRINT_OUTPUT = false;   //to screen
    //WRITE_CNF_GRAMMAR_TO_FILE exports the grammar (read off from Treebank) in format that can be imported (GrammarToTextWriter, in printStuff)
    //so that next time Goodman doesn't have to be trained (choose GRAMMAR_PRINTING_FORMAT=1)
    public static boolean EXPORT_CNF_GRAMMAR_TO_FILE = false;
    public final static boolean EXPORT_CFG_GRAMMAR_TO_FILE = false;
    //formats: 1=print terminals, nonT, and rules+probabilities in one file
    //2=only rules 3=rules plus probabilities, 4=//rules plus counts
    //for exporting target grammar to BMM-algorithm you need format 4
    public static int GRAMMAR_PRINTING_FORMAT = 1;        
    public final static boolean GRAMMAR_PRINTING_PRETERMINALS = false;        
    public final static boolean CALCULATE_LIKELIHOOD = false;
    public final static boolean CALCULATE_PERPLEXITY = false;
    
    /**
     * READ_GRAMMAR_FROM_FILE if TRUE reads explicit grammar rules from StolckeGrammar.txt file 
     * if false, then createGrammarFromTrainingSamples
     */ 
    public static boolean READOFF_GRAMMAR_FROM_TREEBANK = true;
    //depending on READOFF_GRAMMAR_FROM_TREEBANK user must give either TREEBANK_FILE or GRAMMAR_FILE
    
    //WSJ_TREEBANK is read only iff READ_WSJ_TREEBANK_FROM_DIRECTORY=false
    public static String TREEBANK_FILE = "Input/WSJ_labeled_lexical_and_postags.txt"; 
    //TREEBANK_FILE_UNLABELLED only for purposes of lookup in case of FIND_PARSES_CORRESPONDING_WITH_UNLABELED_FROM_TREEBANK
    public static String TREEBANK_FILE_UNLABELLED = "Input/WSJ_unlabeled_postags.txt";
    
    //  Read original WSJ files from directory of WSJ
    public static boolean READ_WSJ_TREEBANK_FROM_DIRECTORY = false;
    //public static String TREEBANK_FILE = "Input/OVIS_compleet_labeled.txt";
    
    public static String GRAMMAR_FILE = "Grammars/TreebankTrainSetCFG_Grammar.txt";
    
    //  default OVIS parse, if DO_WSJ_PARSE then WSJ parse
    public static boolean BRACKET_FORMAT_WSJ_STYLE = true;
    public static int BRANCHING_FACTOR = 2;
    public static int BEAM_WIDTH = 1000;
    
    //DOGOODMANDOP: if false then regular CYK parser
    public static boolean DOGOODMANDOP = false;
    public static boolean DOTESTING = true;
    
    public static boolean PRINT_STANDARD_OUTPUT_TO_FILE = true;
    public static boolean PRINT_COMPUTED_PARSES = false;
    //public final static boolean PRINT_SUMMARY = false;
    //if you parse for multiple parameter settings, then set iterations>1
    //public static int ITERATIONS = 1;
    //for printing output parses of CHILDES
    public final static boolean DO_PARSING_NO_EVALUATION = false;
    //for reading in Tree Bank and printing/outputting the grammar
    //public final static boolean DO_NO_PARSING = true;
    
    /**
     * EXTRACT_POSTAGS: reads only postags from the treebank, and puts these in the parsetree; 
     * if you don't use POSTAGS then it replaces POSTAGS by lexical items
     */
    public static boolean EXTRACT_POSTAGS = false;
    
    public static String TESTSET_UNLABELLED = "Input/WSJ_unlabeled_postags_uppercase.txt";
    public static String TESTSET_GOLD_STANDARD = "Input/WSJ_labeled_lexical_and_postags.txt";
    
  //output unlabeled text to WSJ_unlabeled.txt after reading labeled text
    public final static String OUTPUT_DIR = "./Output";
    public final static boolean PRINT_UNLABELED_FILE = false;
    public final static boolean PRINT_LATEX_FILE = false;
    public final static boolean PRINT_CLEANEDUP_LABELED_FILE = false;
    public final static boolean PRINT_CNF_CONVERTED_LABELED_FILE = false;
    
    //compute file of spans (0-3 1-3 etc) from WSJ-style labeled file
    public final static boolean PRINT_SPANS = false;
    public final static boolean PRINT_BINARYSPANS = false;
    //public final static boolean 
    public final static boolean DO_RIGHT_BRANCHING_TEST = false;
    public final static boolean SWITCH_LEFT_BRANCHING = false;
    
    
    //DIRECT_EVALUATION_NO_PARSING for comparing two labeled treebanks, without computing the parses
    //TAKE_OFF_SBJ: if true, disregards suffixes of the labels in gold standard, such as NP_SBJ, etc, and identifies all NPs, etc.
    public final static boolean TAKE_OFF_SBJ = false;
    //OTHERWAYROUND if false, matches induced labels to gold standard labels, if true, other way round
    public final static boolean OTHERWAYROUND = false;
    public final static boolean DIRECT_EVALUATION_NO_PARSING = false;
    public final static boolean FIND_PARSES_CORRESPONDING_WITH_UNLABELED_FROM_TREEBANK = false;
    //DO_LABEL_ASSOCIATION finds optimal associations between labels of induced parses and gold standard parses
    public final static boolean DO_LABEL_ASSOCIATION = false;
    //DO_LABELED_PRECISION_AND_RECALL: if true, evaluates LP and LR, otherwise UP and UR
    public static boolean DO_LABELED_PRECISION_AND_RECALL = true;
    public final static boolean PRINT_LABEL_FREQUENCIES = false;
    public final static boolean PRINT_PARSEVAL_PER_CATEGORY = false;
    public final static boolean REWRITE_OVIS_PARSES_IN_WSJ_FORMAT = false;
    public final static boolean REPLACE_LABELS_IN_GRAMMAR = false;
    public final static boolean DO_BRANCH_COUNTING = false;
    public static double MU = 1.;
    public final static double START_MU = 0.5;
    public final static int TERMINAL = 1;
    public final static int NONTERMINAL = 2;
    
    public static boolean EXIT_APPLICATION = false;
    
    /** Creates a new instance of Main */
    public Main(String[] args) throws Exception {
    	
    	//collect options
    	//PARSE `grammarFromTreebank' treebankfile testset_unlabeled testset_goldstandard [ovis_style_brackets] [postag]  
    	//PARSE `grammarFromFile' grammar_file testset_unlabeled testset_goldstandard
    	
    	//example: grammarFromTreebank ./Input/WSJ22Test10_labeled.txt ./Input/WSJ22Test10_unlabeled.txt ./Input/WSJ22Test10_labeled.txt postag dop_parser export_grammar_file
    	
    	if (args[0].toLowerCase().equals("grammarfromtreebank"))  {
    		READOFF_GRAMMAR_FROM_TREEBANK = true;
    		TREEBANK_FILE = args[1];
    	}
    	else {
    		if (args[0].toLowerCase().equals("grammarfromfile"))  {
    			READOFF_GRAMMAR_FROM_TREEBANK = false;
    			GRAMMAR_FILE = args[1];
    		}
    		else
    		{
    			System.out.println("Please choose one of the options fromTreebank or fromGrammarFile.");
    			EXIT_APPLICATION = true;
    		}
    	}
    	
    	TESTSET_UNLABELLED = args[2];
        TESTSET_GOLD_STANDARD  = args[3];
    	
    	for (String s: args) {
    		if (s.toLowerCase().equals("postag")) EXTRACT_POSTAGS = true;
    		
    		if (s.toLowerCase().equals("ovis_style_brackets")) BRACKET_FORMAT_WSJ_STYLE = false;
    		
    		if (s.toLowerCase().equals("dop_parser")) DOGOODMANDOP = true;
        	
    		if (s.toLowerCase().equals("export_grammar_file")) EXPORT_CNF_GRAMMAR_TO_FILE = true;
            
    		if (s.toLowerCase().equals("unlabeled_p_r")) DO_LABELED_PRECISION_AND_RECALL = false;

                if (s.toLowerCase().equals("print_parses")) PRINT_COMPUTED_PARSES = true;
                
            if (s.toLowerCase().startsWith("branching=")) {
            	BRANCHING_FACTOR = java.lang.Integer.parseInt(s.split("=")[1]);
    		}
    		if (s.toLowerCase().startsWith("beam=")) {
    			BEAM_WIDTH = java.lang.Integer.parseInt(s.split("=")[1]);
    		}
        }
    	
    	if (!EXIT_APPLICATION && !DO_RIGHT_BRANCHING_TEST && !FIND_PARSES_CORRESPONDING_WITH_UNLABELED_FROM_TREEBANK && !DIRECT_EVALUATION_NO_PARSING) {
    		
	         if (READOFF_GRAMMAR_FROM_TREEBANK) {
	          
	             myGrammar = new Grammar(true);
	             
	             if (Main.PRINT_OUTPUT)  Utils.printoutTerminalsNonTerminalsAndRules(myGrammar);
	             
	             if (Main.EXPORT_CNF_GRAMMAR_TO_FILE) {    //storeRulesAccordingToRHS is done in GrammarToTextWriter
	                 //PRINT GRAMMAR FILE OF THE RULES + PROBABILITIES (printRulesToFile)
	                 
	            	 if (GRAMMAR_PRINTING_FORMAT ==1) Printer.printToFileTerminalsNonTerminalsAndRules(myGrammar);
	                 else Printer.printRulesToFile(myGrammar);   //prints file without terminals and nonT
	                
	             }
	             	                
                 if (!Main.EXPORT_CFG_GRAMMAR_TO_FILE && !Main.CALCULATE_LIKELIHOOD) { 
                     System.out.println("Start indexing rules according to RHS...");
                     myGrammar.indexRulesAccordingToRHS();
                  }
	              
	         }
	         else {	//input is a grammar_file: a list of non-terminals, terminals, and rules + probabilities       
	         
	             myGrammar = new Grammar(GRAMMAR_FILE);
	             
	             if (Main.PRINT_OUTPUT) Utils.printoutTerminalsNonTerminalsAndRules(myGrammar);  
	         }
	
	         
	         //test of a single sentence: 
	         //Utils.doCYKParse(myGammar);
	     }
	     
    	
         //testing
	     
        
         if ((DOTESTING || DO_RIGHT_BRANCHING_TEST || DO_PARSING_NO_EVALUATION)  && !FIND_PARSES_CORRESPONDING_WITH_UNLABELED_FROM_TREEBANK) {
             System.out.println("Starting doTesting... ");
            
             new Testing(TESTSET_UNLABELLED, TESTSET_GOLD_STANDARD, myGrammar);
             
         }
         
         if (DIRECT_EVALUATION_NO_PARSING) {
            
             //relabeled_parses can be either relabeled induced parses or relabeled treebank parses
             //depending on OTHERWAYROUND
             String parseFile1 = "Input_for_evaluation/relabeled_parses.txt";
             //String parseFile2 = "WSJ_labeled_lexical_and_postags_lowercase.txt";
             String parseFile2 = null;
             if (!OTHERWAYROUND) {
                 parseFile2 = "Input_for_evaluation/Viterbi_Treebank_parses.txt"; 
             }
             else parseFile2 = "Input_for_evaluation/Viterbi_parses.txt"; //induced parses
             
             //this is constructor that does not do any parsing, but only evaluation
             Testing myTest = new Testing(parseFile1, parseFile2);

         }
         
         if (FIND_PARSES_CORRESPONDING_WITH_UNLABELED_FROM_TREEBANK) {
             String unlabeledViterbiFile = "Input_for_evaluation/Viterbi_unlabeled.txt";
             String inducedParseViterbiFile = "Input_for_evaluation/Viterbi_parses.txt";
             
                   
             Utils.doFindTreebankParsesForUnlabeledViterbi(unlabeledViterbiFile, TREEBANK_FILE_UNLABELLED, TREEBANK_FILE);
         
    
    	}	//if (!EXIT_APPLICATION, etc) 
    }
    
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws Exception {
        
        new Main(args);
    }
    
    
}