package DOPParser;
/*
 * Grammar.java
 *
 * Created on 24 november 2005, 16:50
 *
 */

/**
 *
 * @author gideon
 */

import java.util.*;
import java.io.*;
import java.text.*;


/**
 * a grammar has an HashMap of Terminals
 * and a HashMap of nonTerminals and their names as hashcode (which contain the production rules inside them
 * it has methods for parsing, generate_all, generate random string
 */
public class Grammar {
    
    protected  HashMap<String, Terminal> Terminals = new HashMap<String, Terminal>();
    protected  HashMap<String, NonTerminal> nonTerminals = new HashMap<String, NonTerminal>();
    protected  HashMap<String, HashSet<Rule>> rulesIndexedByUnaryRHS = new HashMap<String, HashSet<Rule>>();
    protected  HashMap<String, HashSet<Rule>> rulesIndexedByBodyLeftNonT = new HashMap<String, HashSet<Rule>>();
    //protected  HashMap<String, HashSet<Rule>> rulesIndexedByRHSTwo = new HashMap<String, HashSet<Rule>>();
    protected  HashMap<String, GenericPair<String, parseTree>> myTreeBank = new HashMap<String, GenericPair<String, parseTree>>();
    public static int nrBinaryBranches = 0, nrTertiaryBranches = 0, nrQuarteryBranches = 0, nrQuinteryBranches = 0, nrSexteryBranches = 0;
    
    
    public static ArrayList<String> quinteryBranchSentences = new ArrayList<String> ();
    protected static Random rnd = new Random(2096695);
    
    /** Creates a new instance of Grammar 
     * from a file of annotated training samples
     *first it  reads the annotated corpus sentence by sentence and creates parseTree objects from it
     *then it adds the parseTree to the grammar
     */
    public Grammar(boolean blnDummy) throws Exception {
        
            //create TOP, and add to list
            NonTerminal myNonTerminal = new NonTerminal("TOP");
            this.nonTerminals.put("TOP", myNonTerminal);

            BufferedReader buff = null;

            String mySentence;
            int treeIndex = 0;
            parseTree myParseTree = null;
            
            ArrayList<String> completeSentences = new ArrayList<String> ();
            ArrayList<String> unLabeledSentences = new ArrayList<String> ();
            ArrayList<String> labeledSentences = new ArrayList<String> ();
            HashMap<String, String> labeledAndUnlabeledSentences = new HashMap<String, String> ();
            ArrayList<String> latexSentences = new ArrayList<String> ();
            ArrayList<String> spanSentences = new ArrayList<String> ();
            //String=rule, Integer=rule count
            HashMap<String, HashMap<String, Integer>> rulesForPrinting = new HashMap<String, HashMap<String, Integer>>();
            HashMap<String, Integer> preterminalRulesForPrinting = new HashMap<String, Integer>();
            HashSet<String> terminalsForPrinting = new HashSet<String>();
            //HashMap<String, Integer> rulesForPrinting = new HashMap<String, Integer>();
            StringBuffer myCompleteSentence = new StringBuffer();
            String previousCharacter = "";
           
            //iterate over files of WSJ
            //"d:/wsj_combined/02
            if (Main.BRACKET_FORMAT_WSJ_STYLE) {
                if (Main.READ_WSJ_TREEBANK_FROM_DIRECTORY) {
                    for (String myFile : Utils.getListOfWSJFiles()) {

                        buff = new BufferedReader(new FileReader(myFile));
                        System.out.println("Processing file " + myFile + "..." + "; completeSentences.size()=" + completeSentences.size());
                        while ((mySentence = buff.readLine()) !=null){


                            //collect sentence over multiple lines and remove spurious spaces
                            if (mySentence.startsWith("(")) {
                                //store previous sentence
                                if (myCompleteSentence.toString() != null && !myCompleteSentence.toString().equals("")) {
                                    completeSentences.add(myCompleteSentence.toString().substring(1));
                                }

                                //beginning of sentence
                                myCompleteSentence = new StringBuffer();
                                myCompleteSentence.append("+");
                                previousCharacter = "";
                            }

                            //go through sentence and remove spurious spaces    
                            for (int characterPosition =0; characterPosition < mySentence.length(); characterPosition++) {    

                                if (mySentence.substring(characterPosition, characterPosition+1).equals(" ") ) {
                                   //only append if not previous is also space 
                                    if (!previousCharacter.equals(" ")) myCompleteSentence.append(mySentence.substring(characterPosition, characterPosition+1));
                                }
                                else myCompleteSentence.append(mySentence.substring(characterPosition, characterPosition+1));

                                previousCharacter = mySentence.substring(characterPosition, characterPosition+1);
                            }

                        }   //next line of buffer

                       //write last sentence of buffer
                       if (myCompleteSentence.toString() != null) {
                                    completeSentences.add(myCompleteSentence.toString().substring(1));  //without the +
                                }
                        myCompleteSentence = new StringBuffer();
                        previousCharacter = ""; 

                        buff.close();
                      } //list of files
                } //if READ_WSJ_DIRECTORY
            else {  //read preprocessed single WSJ-file
                
                buff = new BufferedReader(new FileReader(Main.TREEBANK_FILE));
                while ((mySentence = buff.readLine()) !=null){
                    //NOTE: this file has TOP already in it!! so cut first "(TOP " and last bracket off
                    //completeSentences.add(mySentence.substring(5, mySentence.length()-1));
                    completeSentences.add(mySentence);
                }               
            }
           int myCounter = 0,  nrTerminals = 0;;
           System.out.println("At completion completeSentences.size()=" + completeSentences.size());

           
           
           //extract parses from sentence
           
           for (String simpleSentence : completeSentences) {
               myCounter++; 
               if (myCounter % 500 ==0) System.out.println("##############################" + myCounter + ": " +simpleSentence + "##############################");
               //System.out.println(simpleSentence);                     
                //remove first and last spurious bracket from sentence
               //xxx check!!! helaas begint niet altijd met ( (, maar soms ook met ((!!!
               if (Main.READ_WSJ_TREEBANK_FROM_DIRECTORY) simpleSentence = simpleSentence.substring(1, simpleSentence.length()-1);
               //if (simpleSentence.startsWith(" ")) simpleSentence = simpleSentence.substring(1);
               simpleSentence = simpleSentence.trim();
               myParseTree = ExtractParseFromWSJText(simpleSentence, false);

               //myParseTree.printToLatex(false, false, "");
               //you still need to remove nonTerminal nodes that have no children
               myParseTree.removeEmptyNonTerminalNodes();

               //you must do this recursively, because empty nodes may stack
               myParseTree.removeEmptyNonTerminalNodes();

               myParseTree.removeEmptyNonTerminalNodes();

               //filter out WSJ10 
               nrTerminals =0;
                for (Node myNode : myParseTree.getNodes()) {

                   if (myNode.getType()==Main.TERMINAL) {  //TERMINALS
                       nrTerminals++;
                   }
                }


               if (nrTerminals<=10) {
            	    
                    boolean hasQuinteryBranches = false;
                    if (Main.DO_BRANCH_COUNTING) {
                        hasQuinteryBranches = myParseTree.doBranchCounting()  ;
                    }
                    
                    if(!hasQuinteryBranches) {
                    	
                       ///////////////////       PREPARE FOR PRINTING    /////////////////
                        if (Main.PRINT_UNLABELED_FILE || Main.PRINT_LATEX_FILE || Main.PRINT_CLEANEDUP_LABELED_FILE || Main.PRINT_CNF_CONVERTED_LABELED_FILE || Main.PRINT_SPANS || Main.CALCULATE_LIKELIHOOD) {
                            prepareParseTreeForPrinting(myParseTree, unLabeledSentences, labeledSentences, latexSentences, spanSentences);
                        }
                        treeIndex++;
                        //System.out.println("line " + treeIndex + ": " + mySentence);

                        if (Main.EXPORT_CFG_GRAMMAR_TO_FILE)
                            prepareCFGGrammarRulesForPrinting(myParseTree, rulesForPrinting, preterminalRulesForPrinting, terminalsForPrinting);
                        
                        //preprocessParseTreeAndAddToGrammar method includes conversion to CNF
                        else preprocessParseTreeAndAddToGrammar(myParseTree, treeIndex);

                        //in CNF-format here
                        if (Main.PRINT_CNF_CONVERTED_LABELED_FILE) {
                            labeledSentences.add(myParseTree.printWSJFormat());
                        }

                        if (Main.PRINT_BINARYSPANS) {
                            System.out.println("Sentence #" + myCounter + ": " + simpleSentence);
                            //spanSentences.add(myParseTree.printToLatex(false, true, ""));
                            spanSentences.add(myParseTree.printSpans());
                        }  
                    }   //if(!hasQuinteryBranches)
               }

                //printToLatex(boolean blnPrintNrSubtrees, boolean blnPrintNodeSpans
           }    //for (String simpleSentence : completeSentences)

            }   //if (Main.BRACKET_FORMAT_WSJ_STYLE)         
                
            else {	//OVIS
                
                //buff = new BufferedReader(new FileReader("WSJ_eenzin_labeled.txt"));
                buff = new BufferedReader(new FileReader(Main.TREEBANK_FILE));
                treeIndex =0;
                while ((mySentence = buff.readLine()) !=null){
                    treeIndex++;
                    //System.out.println("line " + treeIndex + ": " + mySentence);

                    myParseTree = ExtractParseFromOVISText(mySentence);
                    
                    boolean hasQuinteryBranches = false;
                    if (Main.DO_BRANCH_COUNTING) {
                        hasQuinteryBranches = myParseTree.doBranchCounting()  ;
                    }
                    if(!hasQuinteryBranches) {
                        ///////////////////       PREPARE FOR PRINTING    /////////////////
                        if (Main.PRINT_UNLABELED_FILE || Main.PRINT_LATEX_FILE || Main.PRINT_CLEANEDUP_LABELED_FILE || Main.PRINT_CNF_CONVERTED_LABELED_FILE || Main.PRINT_SPANS || Main.CALCULATE_LIKELIHOOD) {
                            prepareParseTreeForPrinting(myParseTree, unLabeledSentences, labeledSentences, latexSentences, spanSentences);
                        }
                        if (!Main.EXPORT_CFG_GRAMMAR_TO_FILE)
                                preprocessParseTreeAndAddToGrammar(myParseTree, treeIndex);

                        else prepareCFGGrammarRulesForPrinting(myParseTree, rulesForPrinting, preterminalRulesForPrinting, terminalsForPrinting);

                        //in CNF-format here
                        if (Main.PRINT_CNF_CONVERTED_LABELED_FILE) {

                            labeledSentences.add(myParseTree.printOVISFormat(true));
                        }

                        if (Main.PRINT_BINARYSPANS) {
                            System.out.println("Sentence #" + treeIndex + ": " + mySentence);
                            myParseTree.printToLatex(false, true, "");
                            spanSentences.add(myParseTree.printSpans());
                        } 
                    }   //if(!hasQuinteryBranches)
                }
                buff.close();
            }
            
                       
            //System.out.println("Before calculateRuleProbabilities");
            if (!Main.EXPORT_CFG_GRAMMAR_TO_FILE) calculateRuleProbabilities();
       
            if (Main.CALCULATE_LIKELIHOOD) {
                
            	calculateLikelihood(myTreeBank, this);
                  
            }   //if (Main.CALCULATE_LIKELIHOOD)
            
            ///////////////        PRINTING        ////////////////
       
            if (Main.EXPORT_CFG_GRAMMAR_TO_FILE) Printer.printCFGGrammar(rulesForPrinting, preterminalRulesForPrinting, terminalsForPrinting);
            
                  
            if (Main.PRINT_UNLABELED_FILE) Printer.printSentences(unLabeledSentences, "unlabeled.txt");
            
            //write latex text to file
            if (Main.PRINT_LATEX_FILE) Printer.printSentences(latexSentences, "WSJ_latex.txt");
                
            //write labeled text to file
            if (Main.PRINT_CLEANEDUP_LABELED_FILE || Main.PRINT_CNF_CONVERTED_LABELED_FILE) 
            	Printer.printSentences(labeledSentences, "WSJ_labeled.txt");
                      
            if (Main.PRINT_SPANS) Printer.printSentences(spanSentences, "Spanfile.txt");
               
            if (Main.DO_BRANCH_COUNTING) {
                System.out.println("nrBinaryBranches=" + nrBinaryBranches);
                System.out.println("nrTertiaryBranches=" + nrTertiaryBranches);
                System.out.println("nrQuarteryBranches=" + nrQuarteryBranches);
                System.out.println("nrQuinteryBranches=" + nrQuinteryBranches);
                System.out.println("nrSexteryBranches=" + nrSexteryBranches);
                
                //print parses with quintery branches
                Printer.printSentences(quinteryBranchSentences, "QuinteryBranchSentences.txt");
           
            }
    }

	public static void calculateLikelihood(HashMap<String, GenericPair<String, parseTree>> bankOfParses, Grammar myGrammar) throws IOException {
		//System.out.println("In CALCULATE_LIKELIHOOD");
		
		double totalLogLikelihood = 0d, logLikelihood=0d, sentenceProbability=0d;
		boolean ruleFound = false;
		TreeMap<Double, ArrayList<String>> sentencesSortedByLikelihood = new TreeMap<Double, ArrayList<String>>();
		HashMap<Integer, Double> totalProbabilityForCertainLength = new HashMap<Integer, Double>();
		HashMap<Integer, Integer> totalSentencesForCertainLength = new HashMap<Integer, Integer>();
		NonTerminal myNodeNonTerminal=null, ChildNonTerminal1=null, ChildNonTerminal2=null;
		Constituent Child1;
		parseTree cnfParseTree = null;
		int sentenceLength;
		
		
		//for (String myLabeledSentence : labeledSentences) {
		//for (parseTree cnfParseTree : bankOfParses) {
		int testCounter = 0;
		//bankOfParses is HM with key=unlabeled sentence and value =parseTree
		for (String myUnlabeledSentence : bankOfParses.keySet()) {
		    testCounter++;
		    //if (testCounter%200==0) System.out.println("testCounter=" + testCounter + "; sentence=" + myUnlabeledSentence.split("#")[0]);
		    System.out.println ("in calculate likelihood" + myUnlabeledSentence.split("#")[0]);
		    
		    //XXX it is CFG now!!! no! CNF
		    //GenericPair<String, parseTree>
		    cnfParseTree = bankOfParses.get(myUnlabeledSentence).getSecond();
		    System.out.println("parseTree=" + cnfParseTree.printWSJFormat());
		    logLikelihood = 0d;
		    sentenceProbability = 1d;
		    //loop over nodes, and multiply the probabilities; also remember total
		    for (Node myNode : cnfParseTree.getNodes()) {
		        //if (testCounter%200==0) System.out.println("Node=" + myNode.getName());
		         Rule tempRule; 
		       
		         myNodeNonTerminal = myGrammar.nonTerminals.get(myNode.getName());
		         
		         //you must reconstruct the rule (LHS and RHS) to find it in grammar so you can obtain rule probability
		        if (myNode.getChildNodes().size()==2) {
		            //find the children
		            
		            ChildNonTerminal1 =  myGrammar.nonTerminals.get(myNode.getChildNodes().get(0).getName());
		            ChildNonTerminal2 =  myGrammar.nonTerminals.get(myNode.getChildNodes().get(1).getName());
		            
		            //this rule may not be unique
		            //tempRule = new Rule(myNodeNonTerminal, rhsArrayofConstituents);
		            tempRule = new Rule(myNodeNonTerminal, ChildNonTerminal1, ChildNonTerminal2 );
		            if (tempRule==null) System.out.println("tempRule==null");
		            
		            //System.out.println("tempRule=" + myNode.getName() + "-> " + myNode.getChildNodes().get(0).getName() + " >< " + myNode.getChildNodes().get(1).getName() + "<<");
		            //System.out.println("tempRule=" + myNodeNonTerminal.getName() + "-> " + ChildNonTerminal1.getName() + " >< " + ChildNonTerminal2.getName() + "<<");
		            ruleFound = false;
		            //enumerate all rules with LHS equal to current node
		            for (Rule existingRule : myNodeNonTerminal.getRules()) {
		                if (tempRule.equals(existingRule)) {
		                    //update loglikelihood
		                    //if (testCounter%200==0) System.out.println("found rule, prob= " + existingRule.getRuleProbability());
		                    logLikelihood -= Math.log(existingRule.getRuleProbability())/Math.log(2.);  //((double) existingRule.getCount()) *
		                    sentenceProbability *= existingRule.getRuleProbability();
		                    System.out.println(myNodeNonTerminal.getName() + " --> " + ChildNonTerminal1.getName() + " "  + ChildNonTerminal2.getName() + " : " + existingRule.getRuleProbability());
		                    ruleFound = true;
		                    break;
		                }  
		            } 
		            if (!ruleFound) System.out.println("Rule doesnt exist");
		        }	//end if (myNode.getChildNodes().size()==2)
		        //rules with RHS size=1 or TOP rules
		        else {  //logLikelihood of preterminals =0??? HELEMAAL NIET!!!
		             //XXX volgens mij alleen TOP -->S met loglikelihood=0: of is S erafgehaald met CNF-conversie?
		            if (myNode.getChildNodes().size()==1 && myNode.getType()==Main.NONTERMINAL) {    //&& myNode.getName().equals("TOP")
		                //TOP node or preterminal: in latter case child can be terminal
		                if (myNode.getChildNodes().get(0).getType()==Main.NONTERMINAL)
		                    Child1 =  myGrammar.nonTerminals.get(myNode.getChildNodes().get(0).getName());
		                else     Child1 =  myGrammar.Terminals.get(myNode.getChildNodes().get(0).getName());
		                tempRule = new Rule(myNodeNonTerminal, Child1);
		                //System.out.println(myNodeNonTerminal.getName() + "-->" + Child1.getName());
		                ruleFound = false;
		                for (Rule existingRule : myNodeNonTerminal.getRules()) {
		                    if (tempRule.equals(existingRule)) {
		                        //update loglikelihood
		                        logLikelihood -= Math.log(existingRule.getRuleProbability())/Math.log(2.);  //((double) existingRule.getCount()) *
		                        sentenceProbability *= existingRule.getRuleProbability();
		                        System.out.println(myNodeNonTerminal.getName() + " --> " + myNode.getChildNodes().get(0).getName() + " : " + existingRule.getRuleProbability());
		                        //System.out.println("Parent = TOP; Child=" + myNode.getChildNodes().get(0).getName() + "; logLikeli=" + logLikelihood);
		                        ruleFound = true;
		                        break;
		                    }
		                }
		                if (!ruleFound) System.out.println("Rule doesnt exist");
		            }   //if (size=1 && TOP
		        }   //else
		         
		    }   //for (Node myNode : cnfParseTree.getNodes())
		    //store in array
		    cnfParseTree.setProbability(logLikelihood);
		    //cnfParseTree.setProbability(sentenceProbability);
		    //keep TreeSet of 50 parseTrees with highest likelihood
		    //if number of parseTrees in TreeSet >50, then remove lowest probability
		    //mostUnlikelyParses.put(new Double(logLikelihood), myUnlabeledSentence.split("#")[0]);
		    
		    //if (mostUnlikelyParses.size()>50) mostUnlikelyParses.remove(mostUnlikelyParses.firstKey());
		    //update totalLogLikelihood
		    totalLogLikelihood += logLikelihood;
		    
		    //store the total probability for trees of different lengths
		    sentenceLength = myUnlabeledSentence.split(" ").length -1; //because unlabeled sentence includes " ."
		    
		    if (totalProbabilityForCertainLength.get(sentenceLength) ==null) {
		        //xxx je moet generic class of pairs of objects maken!!!
		        totalProbabilityForCertainLength.put(sentenceLength, logLikelihood);
		        totalSentencesForCertainLength.put(sentenceLength, 1);
		    }
		    else {
		        totalProbabilityForCertainLength.put(sentenceLength, totalProbabilityForCertainLength.get(sentenceLength) + logLikelihood);
		        totalSentencesForCertainLength.put(sentenceLength, totalSentencesForCertainLength.get(sentenceLength)+1);
		    }
		}   //for (String myUnlabeledSentence : bankOfParses.keySet()) {
		
		System.out.println("!!!!!!!!!!!!!totalLogLikelihood= " + totalLogLikelihood + "; testCounter=" + testCounter);
		//now you know probabilities for all parses in the treebank
		//you must normalize likelihood by dividing by average likelihood of sentences of same length
		//sort it, and output a sorted file of unlabeled sentences with probabil after comma, and a sorted file of labeled sentences
		double normalizedLikelihood = 0d;
		double averageLikelihood = 0d;
		String labeledSentence = null;
		//System.out.println("# sentences=" + bankOfParses.size());
		
		for (String myUnlabeledSentence : bankOfParses.keySet()) {
		    sentenceLength = myUnlabeledSentence.split(" ").length-1;
		    averageLikelihood = totalProbabilityForCertainLength.get(sentenceLength)/((double) totalSentencesForCertainLength.get(sentenceLength));
		       
		    cnfParseTree = bankOfParses.get(myUnlabeledSentence).getSecond(); 
		    normalizedLikelihood = cnfParseTree.getProbability() -averageLikelihood;
		    
		    labeledSentence = bankOfParses.get(myUnlabeledSentence).getFirst();
		    //cnfParseTree.transformBackToCFG();
		    //if (Main.DO_WSJ_PARSE) labeledSentence = cnfParseTree.printWSJFormat();
		    //else labeledSentence = cnfParseTree.printOVISFormat(true);
		   
		    if (sentencesSortedByLikelihood.get(normalizedLikelihood)==null) {
		        //create new HashSet for sentences with this probability
		        ArrayList<String> sentencesWithSameLikelihood = new ArrayList<String>();
		        
		        //.split("#")[0], because random number added at end, so it takes duplicates
		        sentencesWithSameLikelihood.add(myUnlabeledSentence.split("#")[0] + "#" + labeledSentence);
		        sentencesSortedByLikelihood.put(normalizedLikelihood, sentencesWithSameLikelihood);
		    }
		    else {
		        //add pair to HashSet
		        sentencesSortedByLikelihood.get(normalizedLikelihood).add(myUnlabeledSentence.split("#")[0] + "#" + labeledSentence);
		    }        
		}
		
		//print a sorted list of unlabeled sentences, and labeled sentences
		BufferedWriter out_unlabeled = new BufferedWriter(new FileWriter(Main.OUTPUT_DIR + "/" + "sorted_by_likelihood_unlabeled.csv"));
		BufferedWriter out_labeled = new BufferedWriter(new FileWriter(Main.OUTPUT_DIR + "/" + "sorted_by_likelihood_labeled.txt"));
		
		 for (Double myLikelihood : sentencesSortedByLikelihood.keySet()) {
		     for (String pairOfSentences : sentencesSortedByLikelihood.get(myLikelihood)) {
		        sentenceLength = pairOfSentences.split("#")[0].split(" ").length-1;
		        averageLikelihood = totalProbabilityForCertainLength.get(sentenceLength)/((double) totalSentencesForCertainLength.get(sentenceLength));
		    
		        out_unlabeled.write(pairOfSentences.split("#")[0] + ", " + (myLikelihood + averageLikelihood) + ", " + myLikelihood);
		        out_unlabeled.newLine();
		        
		        out_labeled.write(pairOfSentences.split("#")[1] );
		        out_labeled.newLine();
		     }
		 }
		 out_labeled.flush();
		 out_labeled.close();
		 out_unlabeled.flush();
		 out_unlabeled.close();
		
		
		//print the most likely parses
		//NumberFormat numberFormatter;
		//numberFormatter = NumberFormat.getNumberInstance();
      
		//System.out.println("Total Likelihood=" + numberFormatter.format(totalLogLikelihood) + "; The 50 most unlikely sentences are: ");
		//XXX if there is more than one with exact same likelihood in TOP 50, it is missed, because only unique likelihoods
		//for (Double myLikelihood : mostUnlikelyParses.keySet()) {
		//    System.out.println(mostUnlikelyParses.get(myLikelihood) + " : likelihood=" + numberFormatter.format(myLikelihood.doubleValue()));
		//}
	}
    
    public Grammar() {
        
    }
    /* reads in the complete grammar from a file containing the production rules
     *
     **/
    public Grammar(String readGrammarFromFileName) throws Exception {
       
        BufferedReader buff, buffAssociations;
        String mySentence;
        
        //System.out.println("gezocht: " + readGrammarFromFileName);
        //buff = new BufferedReader(new FileReader("d:/StolckeGrammar_artificial.txt"));
        
        buff = new BufferedReader(new FileReader(readGrammarFromFileName));
        
        HashMap<String, String> labelAssociations = new HashMap<String, String>();
        if (Main.REPLACE_LABELS_IN_GRAMMAR) {
            //read the file of label associations
            buffAssociations = new BufferedReader(new FileReader("Input_for_evaluation/labelAssociations.txt"));
            
            while ((mySentence = buffAssociations.readLine()) !=null){
                //associations are separated by comma's
                labelAssociations.put(mySentence.split(",")[0], mySentence.split(",")[1]);
            }
        }
        
        NonTerminal myLHSNonTerminal = null;
        NonTerminal myNonTerminalB = null;
        NonTerminal myNonTerminalC = null;
        String myLHS = null;
        String originalLHS = null;
        String myProb = null;
        Rule myRule = null;

        //skip first line
        //buff.readLine();
        //first read terminals, create them and add to grammar    
        while (!(mySentence = buff.readLine()).equals("NONTERMINALS")){
            this.Terminals.put(mySentence, new Terminal(mySentence));
        }

        //read nonterminals, create them (w/o rules) and add to grammar
        //skip first line
        //buff.readLine();
        //String myNonT = null;
        while (!(mySentence = buff.readLine()).equals("PRODUCTION RULES")){
            if (Main.REPLACE_LABELS_IN_GRAMMAR) {
                if (!(labelAssociations.get(mySentence)==null)) {
                    mySentence = labelAssociations.get(mySentence);
                }
                //else myNonT = mySentence;
            }
            this.nonTerminals.put(mySentence, new NonTerminal(mySentence));
            //System.out.println("NonTerminal=" + mySentence);
        }
        //read rules, add rules to nonterminals and add them to storeRulesAccordingToRHS
        //for probabilities, note E notation

        while ((mySentence = buff.readLine()) !=null){
            if (mySentence.indexOf("RULESOFNONTERMINAL") >= 0) {

                originalLHS = mySentence.substring(19);
                if (Main.REPLACE_LABELS_IN_GRAMMAR) {
                    if (!(labelAssociations.get(originalLHS)==null)) {
                        originalLHS = labelAssociations.get(originalLHS);
                    }
                }
                //System.out.println("myNonTerminalString=" + mySentence.substring(19));
                //System.out.println("myNonTerminalFirst=" + myLHSNonTerminal.getName());
            }
            else    {
                String myRHS = mySentence.split("#")[0];

                if (Main.REPLACE_LABELS_IN_GRAMMAR) {
                    //replace all nonT in RHS by other labels
                    //break up at * and put together again
                    StringBuffer myRHSBuffer = new StringBuffer();
                    
                    for (String myLabel : myRHS.split("\\*")) {
                        if (!(labelAssociations.get(myLabel)==null)) {
                        myRHSBuffer.append(labelAssociations.get(myLabel) + "*");
                        }
                        else myRHSBuffer.append(myLabel + "*");
                        
                        //if (myLabel.equals("CHNK~_NIET|_DE|_VINK~1063")) System.out.println("CHNK~_NIET|_DE|_VINK~1063 replaced by " + labelAssociations.get(myLabel));
                    }
                    myRHS = myRHSBuffer.toString();
                    
                }
                //System.out.println("myRHS=" + myRHS);
                
                ArrayList<String> completeRules = new ArrayList<String>();  

                //also check length=2 en X --> Y w
                boolean blnNonCNF = false;
                //rules of length 2 cannot have terminals in their RHS if (this.Terminals.containsKey(myRHS.split("\\*")[0]))
                //if (myRHS.split("\\*").length == 2 && (myRHS.split("\\*")[0].equals(myRHS.split("\\*")[0].toLowerCase()) || myRHS.split("\\*")[1].equals(myRHS.split("\\*")[1].toLowerCase()))) blnNonCNF = true;
                if (myRHS.split("\\*").length == 2 && (this.Terminals.containsKey(myRHS.split("\\*")[0]) || this.Terminals.containsKey(myRHS.split("\\*")[1]))) blnNonCNF = true;
                 
                if (myRHS.split("\\*").length > 2 || blnNonCNF) {    //non-CNF rule
                    //you need to split the rule into subrules and then loop over each of the subrules
                    //while assigning/dividing correct probabilities

                    //System.out.println("THIS RULE IS NOT IN CNF FORMAT!!! " + originalLHS + "-->" + myRHS);
                    //create parseTree (Root node is automatically created from LHS
                    parseTree myParseTree = new parseTree(originalLHS); 
                    Node topNode = myParseTree.getNodes().get(0);
                    Node myChildNode;
                    //create nodes with parents LHS
                    for (String childName : myRHS.split("\\*")) {
                        myChildNode = new Node(childName, topNode);
                        myChildNode.setType(Main.NONTERMINAL);
                        //XXX TEMP SOLUTION  if (childName.equals(childName.toLowerCase()))
                        if (this.Terminals.containsKey(childName)) myChildNode.setType(Main.TERMINAL);
                        myParseTree.addNode(myChildNode);
                        //add RHS nodes as children to parent
                        topNode.addChildNode(myChildNode);
                    }

                    //convert rule to CNF format
                    myParseTree.transformToCNF();


                    //create array of rules, one for each node of parseTree
                    for (Node NodeCNF : myParseTree.getNodes()) {
                        //only iff node has children
                        if (NodeCNF.getChildNodes().size()>0) {

                            //create nonTerminals for CNF-forms (but w/o rules)
                            if (NodeCNF.getName().contains("X_")) {
                                if (!this.nonTerminals.containsKey(NodeCNF.getName())){
                                    //create new NonTerminal
                                    NonTerminal newNonTerminal = new NonTerminal(NodeCNF.getName());
                                    this.nonTerminals.put(NodeCNF.getName(), newNonTerminal);
                                }
                            }

                            //reconstruct rule, with correct RHS and prob
                            StringBuffer recreatedRule = new StringBuffer(); 
                            //LHS    
                            recreatedRule.append(NodeCNF.getName()).append("#");
                            //children
                            for (Node childNode : NodeCNF.getChildNodes()) {
                                recreatedRule.append(childNode.getName()).append("*");
                            }
                            //assign correct probabilities
                            recreatedRule.append("#");
                            //all rules with X in LHS get probability 1
                            if (NodeCNF.getName().contains("X_")) recreatedRule.append("1.");
                            else recreatedRule.append(mySentence.split("#")[1]);

                            //add to completeRules
                            completeRules.add(recreatedRule.toString());
                            //System.out.println("recreatedRule=" + recreatedRule.toString());
                        }
                    }
                }
                else {  //rewrite binary or unary rule such that it includes LHS
                    completeRules.add(originalLHS + "#" + myRHS + "#" + mySentence.split("#")[1]);
                    //System.out.println("originalLHS +# + mySentence=" + originalLHS + "#" + mySentence);
                }

                //now we got everything in CNF format (hopefully)
                for (String completeRule : completeRules) {

                     
                    myLHS = completeRule.split("#")[0];
                    myRHS = completeRule.split("#")[1];
                    
                    //System.out.println("completeRule="+completeRule + "; myLHS=" + myLHS + "STOP");
                    //if (myRHS==null) System.out.println(completeRule);
                    myProb = completeRule.split("#")[2];
                    myLHSNonTerminal = this.nonTerminals.get(myLHS);
                    if (myLHSNonTerminal ==null) System.out.println("myLHSNonTerminal ==null");
                    //TEMP SOLUTION
                    if (myRHS.equals("*")) continue;
                    String leftBodyNonT = myRHS.split("\\*")[0];
                    String rightBodyNonT = null;

                    //System.out.println("completeRule=" + completeRule + "; myLHS=" + myLHS + "; myRHS=" + myRHS + "; myProb=" + myProb + "; myLHSNonTerminal=" + myLHSNonTerminal.getName()  );

                    if (myRHS.split("\\*").length == 2) {    //binary rule
                        rightBodyNonT = myRHS.split("\\*")[1];
                        //System.out.println("length=2: " + myRHS.split("\\*")[0]);
                        //System.out.println(leftBodyNonT + "= " + this.nonTerminals.get(leftBodyNonT) + "; rightBodyNonT: " + rightBodyNonT + "= " + this.nonTerminals.get(rightBodyNonT));
                        myRule =  new Rule(myLHSNonTerminal, this.nonTerminals.get(leftBodyNonT), this.nonTerminals.get(rightBodyNonT));
                        }
                    else   {    //unary rules : check if RHS is terminal or nonterminal

                        if(this.Terminals.get(leftBodyNonT)!=null) {
                           //terminal
                            //System.out.println("Terminal=" + myRHS.split("\\*")[0]);
                            //System.out.println("idem=" + this.Terminals.get(myRHS.split("\\*")[0]).getName());
                           myRule =  new Rule(myLHSNonTerminal, this.Terminals.get(leftBodyNonT)) ;
                        }
                        else    {
                            //System.out.println("NonTerminal=" + myRHS.split("\\*")[0]);
                            //System.out.println("idem=" + this.nonTerminals.get(myRHS.split("\\*")[0]).getName());
                          myRule =  new Rule(myLHSNonTerminal, this.nonTerminals.get(leftBodyNonT)) ;  
                        }
                    }
                    //add probs

                    double ruleProb = 0d;
                    if (myProb.contains("E")) {
                        ruleProb = java.lang.Double.parseDouble(myProb.split("E")[0])*Math.pow(10d, java.lang.Double.parseDouble(myProb.split("E")[1]));
                    }
                    else ruleProb = java.lang.Double.parseDouble(myProb);
                    myRule.setRuleProbability(ruleProb);

                    //add rule to nonTerminal
                    //System.out.println("myLHSNonTerminal=" + myLHSNonTerminal.getName());
                    myLHSNonTerminal.addRule(myRule);

                    //add to rulesIndexedByBodyLeftNonT
                    if (rightBodyNonT != null)   {   //you only want to store binary rules
                        
                        if (rulesIndexedByBodyLeftNonT.get(leftBodyNonT) == null) {
                            //you need to create a new HashSet of Rules and add it to HashMap of rulesIndexedByUnaryRHS
                            HashSet<Rule> myRuleSet = new HashSet<Rule>();
                            myRuleSet.add(myRule);
                            this.rulesIndexedByBodyLeftNonT.put(leftBodyNonT, myRuleSet);
                        }
                        else    {
                            //System.out.println("adding Rule " + myLHS + " --> " + leftBodyNonT + " " + rightBodyNonT);
                            for (Constituent myConstituent : myRule.getRightHandSide()) {
                                //System.out.println(myConstituent.getName());
                            }
                            //System.out.println("MyRHS=" + myRule.getRightHandSide()
                            //get reference to HashSet of rules with the same leftBodyNonT
                            //adding to HashSet<Rule> invokes equals method of Rule
                            rulesIndexedByBodyLeftNonT.get(leftBodyNonT).add(myRule);
                        }
                    }
                    else    {   //unary RHS
                        //add to rulesIndexedByUnaryRHS
                        if (rulesIndexedByUnaryRHS.get(myRHS) == null) {
                            //you need to create a new HashSet of Rules and add it to HashMap of rulesIndexedByUnaryRHS
                            HashSet<Rule> myRuleSet = new HashSet<Rule>();
                            myRuleSet.add(myRule);
                            this.rulesIndexedByUnaryRHS.put(myRHS, myRuleSet);
                        }
                        else    {
                            rulesIndexedByUnaryRHS.get(myRHS).add(myRule);
                        }
                    }
                }
            }

        }
   
        /* temp
        int matchingConstituents = doPARSEVAL(bankOfParses.get(0),  myTreeBank.get(1));
        System.out.println("matchingConstituents=" + matchingConstituents);
        System.out.println("nrConstituentsTree1=" + myTreeBank.get(0).getNodes().size());
        System.out.println("nrConstituentsTree2=" + myTreeBank.get(1).getNodes().size());
        */
   
    }
    
        
    public void prepareParseTreeForPrinting(parseTree myParseTree, ArrayList<String> unLabeledSentences, ArrayList<String> labeledSentences, ArrayList<String> latexSentences, ArrayList<String> spanSentences) {
        
    //System.out.println( "in prepareParseTreeForPrinting");
//write it into stringarray
         StringBuffer unLabeledSentence = new StringBuffer();

        // int nrTerminals = 0;
        for (Node myNode : myParseTree.getNodes()) {

           if (myNode.getType()==Main.TERMINAL) {  //TERMINALS
               //nrTerminals++;
                unLabeledSentence.append(myNode.getName().split(" ")[0]).append(' ');           
           }
        }
        
        unLabeledSentence.append(".");
        unLabeledSentences.add(unLabeledSentence.toString()); 
        //still in CFG-format here
        if (!Main.PRINT_CNF_CONVERTED_LABELED_FILE) {
            if(Main.BRACKET_FORMAT_WSJ_STYLE) labeledSentences.add(myParseTree.printWSJFormat());
            else labeledSentences.add(myParseTree.printOVISFormat(true));
        }
        //labeledAndUnlabeledSentences.put(unLabeledSentence.toString(), myParseTree.printWSJFormat());
        //remember parseTrees
        
        GenericPair<String, parseTree> myPair;
        if(Main.BRACKET_FORMAT_WSJ_STYLE) myPair = new GenericPair<String, parseTree>(myParseTree.printWSJFormat(), myParseTree);
        else myPair = new GenericPair<String, parseTree>(myParseTree.printOVISFormat(true), myParseTree);
        //myParseTree is still CFG, but if you  keep reference to it, it will turn later into CNF
        myTreeBank.put(unLabeledSentence.toString() + "#" + Math.random(), myPair);
        
        if (Main.PRINT_LATEX_FILE || Main.PRINT_SPANS) {
            myParseTree.calculateNodeDepthAndLabelNodes();
            myParseTree.calculateNodeSpans();
            
            //before CNF-conversion
            if (!Main.PRINT_BINARYSPANS) {
                
                latexSentences.add(myParseTree.printToLatex(false, true, ""));
                spanSentences.add(myParseTree.printSpans());
            }
        }    
    /////////////////////////////////////////////////////////
    //print the parseTree in Latex format (on screen)
    //myParseTree2.printToLatex(false, false);
}

    public void preprocessParseTreeAndAddToGrammar(parseTree myParseTree, int treeIndex){
        
        //temp for printing only: calculate depth of nodes and label them
        //myParseTree2.calculateNodeDepthAndLabelNodes();

        //temp for printing latex only: calculate spans
        //myParseTree2.calculateNodeSpans();

        //extract unlabeled sentence
        StringBuffer unLabeledSentence = new StringBuffer();

                   
        //CNF conversion
        //System.out.println("CNF conversion");
        myParseTree.transformToCNF();
        
        
        //calculate depth of nodes and label them
        myParseTree.calculateNodeDepthAndLabelNodes();
        myParseTree.calculateNodeSpans();
        //myParseTree.printToLatex(false, true, "");
        
       if (Main.DOGOODMANDOP) {     
            //count the subtrees, start on the deepest level, and go up
            //store nr of subtrees in every node
            myParseTree.calculateSubTrees_generalized();

            //print the CNF conversed parseTree in Latex format (including nr subtrees)
            //myParseTree2.printToLatex(true, false);

            //add terminals, nonterminals and rules to grammar
            //NB if you haven't converted to CNF you will get error here
            addParseTreeToGrammar(myParseTree, treeIndex);
        }
        else addParseTreeToGrammarNoGoodman(myParseTree);      

        //temp test
        //if (this.nonTerminals.containsKey("X_7963")) {
        //    System.out.println("X_7963 is in list of nonT" );
        //    for (Rule myRule : this.nonTerminals.get("X_7963").getRules()) {
        //        System.out.println("X_7963-->" + myRule.getRightHandSide());
        //    }
        //}
         
                    
        //test convert back to CFG
        //myParseTree2.transformBackToCFG();

        //System.out.println("The parse tree converted back to CFG:");
        //myParseTree2.printToLatex(true, false);

    }
    
    
    public String[] Expand(String[] currentSentence, int branch){
        String[] Constituents = new String[100];
        //todo
        return Constituents;
    }
    
    public String[] GenerateRandomSentence(int maxDepth, int minLength, int maxLength) {
        
        ArrayList<Constituent> mySentence = new ArrayList<Constituent>();
        int depth = 0;
        int nrOfTerminals = 0;
        int nrOfTrials = 0;
        
        while ((nrOfTerminals < minLength || nrOfTerminals > maxLength) && nrOfTrials < 50){
            nrOfTrials++;
            
            //start with TOP
            mySentence.clear();
            mySentence.add(this.nonTerminals.get("TOP") );
            
            while(depth <= maxDepth && nrOfTerminals == 0){
                depth++;
                mySentence = GetRandomSuccessor(mySentence);
                nrOfTerminals = CheckIfAllTerminals(mySentence);
            }
            
        }
        //check again if sentence contains only terminals (in case thrown out because exceeds depth
        if (CheckIfAllTerminals(mySentence)>0) {
            //convert terminals to strings
            ArrayList<String> outputsentence = new ArrayList<String>();
            
            for (Constituent myConstituent : mySentence){
                
                Terminal tempTerminal = (Terminal) myConstituent;
                outputsentence.add(tempTerminal.getName());
                //outputsentence[k] = tempTerminal.getName();
                //k++;
            }
            return outputsentence.toArray(new String[0]) ;
        } else return null;
    }
    
    /* if only Terminals then returns nr of Terminals
     * otherwise returns 0\
     */
    
    public int CheckIfAllTerminals(ArrayList<Constituent> inputsentence){
        int nrOfTerminals = 0;
        for (Constituent myConstituent : inputsentence){
            if(myConstituent instanceof NonTerminal) return 0;
            else nrOfTerminals++;
        }
        return nrOfTerminals ;
    }
    
    public ArrayList<Constituent> GetRandomSuccessor(ArrayList<Constituent> inputsentence){
        ArrayList<Constituent> outputSentence = new ArrayList<Constituent>();
        
        //iterate over constituents and find nonTerminals
        for (Constituent myConstituent : inputsentence){
            
            if(myConstituent instanceof NonTerminal){
                
                //find out how many branches/rules NonTerminal has
                NonTerminal tempConstituent  = nonTerminals.get(myConstituent.getName());
                //pick a random nr between 0 and total nr of rules of NonTerminal
                //??? klopt dit?
                //if(tempConstituent.countRules() == 0){
                //    System.out.println("Problem with: " + tempConstituent.getName());
                //}
                assert(tempConstituent.countRules() > 0);
                
                int randomRule = rnd.nextInt(tempConstituent.countRules());
                int iCheck = tempConstituent.countRules();
                //get the rule
                //assert(randomRule < tempConstituent.countRules());
                for (Constituent insertConstituent : tempConstituent.getRule(randomRule).getRightHandSide() ){
                    outputSentence.add(insertConstituent);
                }
            } else outputSentence.add(myConstituent);
            
        }
        return outputSentence;
    }
    
    public String[] GetListofTerminals(){
        //String[] myTerminalArray = new String[100];
        ArrayList<String> myTerminalArrayList = new ArrayList<String>();
        //int i = 0;
        for(String key : this.Terminals.keySet()) {
       
            myTerminalArrayList.add(key);
        }
        
        return myTerminalArrayList.toArray(new String[0]);
    }
    
    public String[] GetListofNonTerminals(){
        //String[] myNonTerminalArray = new String[100];
        ArrayList<String> myNonTerminalArrayList = new ArrayList<String>();
        //int i = 0;
        for(String key : this.nonTerminals.keySet()) {
            //also add count of subtrees
            myNonTerminalArrayList.add(key);
            // + " (" + this.nonTerminals.get(key).getnrSubtrees() + ") "
        }
        return myNonTerminalArrayList.toArray(new String[0]) ;
    }
    
    public ArrayList<ArrayList<String>> GetListofRulesPlusProbabilities(String myLHS){
        
        ArrayList<ArrayList<String>> ArrayListofRules = new ArrayList<ArrayList<String>>();
        //??? er zijn misschien nonTerminals zonder rule: maar dat mag niet
        ArrayList<Rule> myAssociatedRules = this.nonTerminals.get(myLHS).getRules();  
          
        for (Rule myRule : myAssociatedRules){
            ArrayList<String> RuleArray = new ArrayList<String>();
            ArrayListofRules.add(RuleArray);
            
            //populate list of rules
            RuleArray.add(myLHS);
            
            //add RHS
            for (Constituent myConstituent : myRule.getRightHandSide() ){
                if (myConstituent==null) System.out.println("Nullpointer: LHS=" + myRule.getLeftHandSide().getName());
                RuleArray.add(myConstituent.getName());
            }
            //add probability
            NumberFormat numberFormatter;

            numberFormatter = NumberFormat.getNumberInstance();

            String myProb = "(" + numberFormatter.format(myRule.getRuleProbability()) + ")";
            //String myProb = "(" + myRule.getRuleProbability() + ")";
            RuleArray.add(myProb);
        }
        return ArrayListofRules;
        
    }
    
    /** Extracts the rules from the annotated OVIS file
     *  @param sentence one sentence of OVIS file
     */
    protected static parseTree ExtractParseFromOVISText(String sentence){
        
        //(a,[(tv,[(nee_,[])]),(vp,[(v,[(dank_,[])]),(per,[(u_,[])])])]).
    
        //create temporary sentence with node structure
        //TOP node is automatically created
        parseTree myParseTree = new parseTree();
        
        int characterPosition = 0;
        int wordPosition = 0;
        
        //currentNode is the index of the node in the parseTree (ArrayList of nodes)
        //set currentNode to TOP
        Node currentNode = myParseTree.getNode(myParseTree.getNodes().size()-1);
        //System.out.println(sentence);
        
        //turn sentence into array of characters
        char[] mySentence = sentence.toCharArray();
        
        StringBuffer mySymbol;
        
        while ( characterPosition < sentence.length() -1) {     //without ending .
            
            // one of three possibilities:
            // if current character is (, then a new rule starts --> move till after [
            // if current character is ], then the rule ends --> move till after )
            // otherwise, if next character is comma, then stay within the same rule --> move behind comma
            
            // 1) start of new rule:
            if (mySentence[characterPosition] == '(' ) {
                
                characterPosition++;    //pass the (
                //System.out.println(mySentence[characterPosition]);
                
                //get name of symbol, or LHS of rule
                mySymbol = new StringBuffer();
                while (mySentence[characterPosition] != ',') {
                    
                    mySymbol.append(mySentence[characterPosition]);
                    //System.out.println(mySentence[characterPosition]);
                    characterPosition++;
                }
                //add the symbol to rule of dominant node
                //NB at instantiation of myParseTree the first current rule with LHS TOP is automatically created 
                                  
                //make new node out of mySymbol, which is daughter of currentNode
                //currentNode is pointer to parent node (xxx)
                Node myNode = new Node(new String(mySymbol), currentNode);
                myParseTree.addNode(myNode);

                        
                //add this node as a child to parent node
                currentNode.addChildNode(myNode);
                
                //position is now on comma, move ahead till you get behind [
                characterPosition++; //pass the comma
                //System.out.println(mySentence[characterPosition]);
                characterPosition++; //pass the left bracket of RHS [
                //System.out.println(mySentence[characterPosition]);
                
                if (mySentence[characterPosition-3] == '_' ) {
                    //it was a terminal node, move ahead till after )
                    myNode.setType(Main.TERMINAL); 
                    //add the word position info in leftSpan and rightSpan fields
                    wordPosition++;
                    myNode.setLeftSpan(wordPosition-1);
                    myNode.setRightSpan(wordPosition);
                    
                    characterPosition++; //pass the ]
                    //System.out.println(mySentence[characterPosition]);
                    characterPosition++; //pass the )
                    //System.out.println(mySentence[characterPosition]);
                    
                }    
                else {
                    //set type to nonTerminal
                    myNode.setType(Main.NONTERMINAL);     
                     //make this node the currentNode node
                    currentNode = myNode;    
                }
                
            }
            
            //end of RHS of rule
           // else {
                if (mySentence[characterPosition] == ']') {
                //this is the end of all the sister nodes
                    
                    //position is now on ], move on till behind the )
                    characterPosition++; //pass the ]
                    //System.out.println(mySentence[characterPosition]);
                    characterPosition++; //pass the )
                    //System.out.println(mySentence[characterPosition]);
                    //update currentNode to parent of current
                    currentNode = currentNode.getParentNode();
                }
                

                //stay with same node
                if(mySentence[characterPosition] == ',') {
                    characterPosition++; //pass the comma
                    //System.out.println(mySentence[characterPosition]);
                } 

        }
        
        /*test
         for (Node myNode : myParseTree.getNodes()) {
            System.out.println(myNode.getName() + " --> " + myNode.getChildNodes());
        }
         */
        return myParseTree;
    }
    
    /** Extracts the rules from the annotated OVIS file
     *  @param sentence one sentence of OVIS file
     */
    protected static parseTree ExtractParseFromWSJText(String sentence, boolean blnPrintChars){
        //System.out.println("in ExtractParseFromWSJText; blnPrintChars=" + blnPrintChars + "; sentence=>>" + sentence + "<<");
        //(a,[(tv,[(nee_,[])]),(vp,[(v,[(dank_,[])]),(per,[(u_,[])])])]).
    
        //(S (NP (POSTAG hallo)))
        //( (S (NP-SBJ (NNP Ms.) (NNP Waleson) ) (VP (VBZ is) (NP (NP (DT a) (JJ free-lance) (NN writer) ) (VP (VBN based) (NP (-NONE- *) ) (PP-LOC-CLR (IN in) (NP (NNP New) (NNP York) ))))) (. .) ))
        //create temporary sentence with node structure
        //TOP node is automatically created
        parseTree myParseTree = new parseTree();
        
        int characterPosition = 0;
        int wordPosition = 0;
        
        //replace the first occurrence of S by TOP
        if ((Main.DO_LABEL_ASSOCIATION || Main.DIRECT_EVALUATION_NO_PARSING) && sentence.startsWith("(top (s ")) sentence = "(top " + sentence.substring(8,  sentence.length()-2);
        //System.out.println(sentence);
        //currentNode is the index of the node in the parseTree (ArrayList of nodes)
        //set currentNode to TOP, which is created in constructor of parseTree
        Node currentNode = myParseTree.getNode(myParseTree.getNodes().size()-1);
        
        //XXX maar er zit ook TOP in de sentence!!!
        
        
        //turn sentence into array of characters
        char[] mySentence = sentence.toCharArray();
                
        StringBuffer mySymbol;
        
        while ( characterPosition < sentence.length()) {    
            
            //(np,[(capelle,[(capelle_,[])]),(aan,[(aan_,[])]),(den,[(den_,[])]),(ijssel,[(ijssel_,[])])]).
        	//(a lorillard spokewoman (said)) ( (this) (is (an old story)))
        	//(TOP (S (NP-SBJ (dt a) (nnp lorillard) (nn spokewoman)) (VP (vbd said) (S (NP-SBJ (dt this)) (VP (vbz is) (NP-PRD (dt an) (jj old) (nn story)))))))
            // one of three possibilities:
            // if current character is (, then a new Node starts --> move till after ( 
            // if current character is ), then the rule ends --> move till after )
            // otherwise, if next character is space, then stay within the same rule --> move behind space
            
        	//
            // 1) start of new rule:
            if (mySentence[characterPosition] == '(' ) {
                
                if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                characterPosition++;    //pass the (
                
                //get name of symbol, or LHS of rule
                mySymbol = new StringBuffer();
               // (s,[(per,[(ik_,[])]),(vp,[(v,[(wil_,[])]),(mp,[(p,[(naar_,[])]),(np,[(np,[(den,[(den_,[])]),(haag,[(haag_,[])])]),(np,[(np,[(centraal_,[])]),(n,[(station_,[])])])])])])]).

                while (mySentence[characterPosition] != ' ') {
                    mySymbol.append(mySentence[characterPosition]);
                    if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                    characterPosition++;
                }
                
                //only if not again TOP
                String mySymbolString = mySymbol.toString();
                
                //System.out.println("mySymbolString=" + mySymbolString);
                
                if (!mySymbolString.toUpperCase().equals("TOP") ) { 
                    //add the symbol to rule of dominant node
                    //NB at instantiation of myParseTree the first current rule with LHS TOP is automatically created 
                    //take off the -SBJ etc
                    //if (Main.TAKE_OFF_SBJ && mySymbolString.contains("-")) mySymbolString = mySymbolString.split("-")[0];
                    //make new node out of mySymbol, which is daughter of currentNode
                    //currentNode is pointer to parent node (xxx)
                    Node myNode = new Node(mySymbolString, currentNode);
                    myParseTree.addNode(myNode);


                    //add this node as a child to parent node
                    currentNode.addChildNode(myNode);

                    //position is now on space, move ahead till you get behind space
                    if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                    characterPosition++; //pass the space

                    //last word is a Terminal unless current character = (
                    if (mySentence[characterPosition] != '(' ) {
                        //it was a terminal node, move ahead till after )
                        myNode.setType(Main.TERMINAL); 
                        //add the word position info in leftSpan and rightSpan fields
                        wordPosition++;
                        myNode.setLeftSpan(wordPosition-1);
                        myNode.setRightSpan(wordPosition);

                        //find the name of the terminal and replace the label of the node
                        //continue till )

                        mySymbol = new StringBuffer();
                        while (mySentence[characterPosition] != ')') {
                            mySymbol.append(mySentence[characterPosition]);
                            if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                            characterPosition++;
                        }
                        //replace label, unless it is one of (, . : `` '' -LRB- or -RRB-), in which case you delete it!!!
                        if (myNode.getName().equals(",") || myNode.getName().equals(".") || myNode.getName().equals(":") || myNode.getName().equals("``") || myNode.getName().equals("''") || myNode.getName().equals("-LRB-") || myNode.getName().equals("-RRB-") || myNode.getName().equals("$") || myNode.getName().equals("#") || myNode.getName().equals("-NONE-") ) {

                            //remove reference in parent Node
                            currentNode.getChildNodes().remove(myNode);
                            //delete node (cross fingers that it works)
                            myParseTree.getNodes().remove(myNode);
                            wordPosition--;
                        }
                        else 
                            if (!Main.EXTRACT_POSTAGS) {    //if you don't use POSTAGS then replace POSTAGS by lexical items
                                //myNode.replaceLHS(mySymbol.toString());
                                myNode.replaceLHS((myNode.getName() + " " + mySymbol.toString()).toLowerCase());
                            }
                        if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                        characterPosition++; //pass the )
                    }    
                    else {
                        //set type to nonTerminal
                        myNode.setType(Main.NONTERMINAL);     
                         //make this node the currentNode node
                        currentNode = myNode;    
                    }   
                }   //it was not TOP again
                else {
                    if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                    characterPosition++; //it was TOP, pass the space
                }
            }
            
            //end of RHS of rule
            if (mySentence[characterPosition] == ')') {
            //this is the end of all the sister nodes

                //position is now on ), move on till behind the ) 
                if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                characterPosition++; //pass the )
                //update currentNode to parent of current
                currentNode = currentNode.getParentNode();
            }

            //stay with same node
            if (characterPosition< sentence.length()) {
                if(mySentence[characterPosition] == ' ') {
                    if (blnPrintChars) System.out.println(mySentence[characterPosition]);
                    characterPosition++; //pass the space
                } 
            }       
        }
        
        //test
        //System.out.println(mySentence);
        /*
         for (Node myNode : myParseTree.getNodes()) {
             ArrayList<String> childnodes = new ArrayList<String>();
            for (Node childNode : myNode.getChildNodes()) {
                childnodes.add(childNode.getName());
            }
            //System.out.println(myNode.getName() + " --> " + myNode.getChildNodes());
            System.out.println(myNode.getName() + " --> " + childnodes);
        }
         */
        return myParseTree;
    }
    //[.{TOP} [.{S} [.{NP-SBJ} {Ms.} {Waleson} ] [.{VP} {is} [.{NP} [.{NP} {a} {free-lance} {writer} ] [.{VP} {based} [.{NP} ] [.{PP-LOC-CLR} {in} [.{NP} {New} {York} ] ] ] ] ] ] ]
    //[.{TOP} [.{S} [.{NP-SBJ} {Ms.} {Waleson} ] [.{VP} {is} [.{NP} [.{NP} {a} {free-lance} {writer} ] [.{VP} {based} [.{NP} ] [.{PP-LOC-CLR} {in} [.{NP} {New} {York} ] ] ] ] ] ] ]
    /*
     * enumerate the nodes of the parseTree, start with deepest level
     * add Terminals and NonTerminals to grammar, and create 8 rules for each NonTerminal node
        //add them to grammar with appropriate probabilities
     */
    public void addParseTreeToGrammar(parseTree myParseTree, int treeIndex){
        
        NonTerminal exteriorNonTerminal, interiorNonTerminal;
        Terminal myTerminal;
         
        //start with deepest level and climb up the tree
        for (int currentDepth = myParseTree.deepestLevel; currentDepth >=0; currentDepth--) {
            
            for (Node myNode : myParseTree.getNodes()) {
                //LOOP over all nodes, including terminal nodes
                if (myNode.getDepth() == currentDepth) {
                    
                    String myNodeName = myNode.getName();
                    
                    //System.out.println("");
                    //System.out.println("XXXXXX depth=" + currentDepth + ", node=" + myNodeName);
                    //first part: create Terminals or nonTerminals for the grammar
                    if (myNode.getType()==Main.TERMINAL) {  //TERMINALS
                        
                        //all terminals are converted to lower case
                        myNodeName = myNodeName.toLowerCase();
                        
                        //if Terminal is not yet in Arraylist of grammar then add it
                        if (!this.Terminals.containsKey(myNodeName)) {
                            //turn node into terminal and add to grammar
                            myTerminal = new Terminal(myNodeName);
                            this.Terminals.put(myNodeName, myTerminal);
                            }
                        else myTerminal = this.Terminals.get(myNodeName);
                    } 
                    else {  //NONTERMINALS
                        //create an EXTERIOR NonTerminal, but first check if it already exists
                        if (!this.nonTerminals.containsKey(myNodeName)){
                            //create exterior NonTerminal
                            exteriorNonTerminal = new NonTerminal(myNodeName);
                            //add nr of subtrees
                            exteriorNonTerminal.setnrSubtrees(myNode.getnrSubtrees());
                            //add it to HashMap of nonTerminals of the grammar
                            this.nonTerminals.put(myNodeName, exteriorNonTerminal);
                            } 
                 
                        else {  //return reference
                            exteriorNonTerminal = this.nonTerminals.get(myNodeName);
                             //compute nr of subtrees for external nonTerminals    
                            exteriorNonTerminal.setnrSubtrees(exteriorNonTerminal.getnrSubtrees() + myNode.getnrSubtrees());
                            }
                        
                        //create an INTERIOR (labeled) nonTerminal, but not for TOP
                        //if (!(myNodeName.equals("TOP"))){
                            String interiorNodeName = myNodeName + "@" + myNode.getLabel()  + "@" + treeIndex;
                            interiorNonTerminal = new NonTerminal(interiorNodeName);
                            //add nr of subtrees
                            interiorNonTerminal.setnrSubtrees(myNode.getnrSubtrees());
                            //add it to HashMap of nonTerminals of the grammar
                            this.nonTerminals.put(interiorNodeName, interiorNonTerminal);
                        //}
                 
                        //PART 2: add rules - only if current node is NONTERMINAL
                        //ArrayList<Constituent> rhsArrayofConstituents = new ArrayList<Constituent>();
                        Rule tempRule;
                        
                        //if current node is preterminal then no labels for children
                        if (myNode.getChildNodes().size()==1 && !(myNode.getName().equals("TOP")))   {   //PRETERMINAL OR TOP!!
                            //add two rules: one for exterior and one for interior
                            //??? what are probabilities for these???
                            String daughterTerminalName = myNode.getChildNodes().get(0).getName().toLowerCase();
                            Terminal daughterTerminal = this.Terminals.get(daughterTerminalName);
                            
                            //add one rule to interior nonTerminal: this is unique rule
                            interiorNonTerminal.addRule(new Rule(interiorNonTerminal, daughterTerminal));
                            //for exterior nonTerminal, check if rule already exists
                            Rule tempRule1 = new Rule(exteriorNonTerminal, daughterTerminal);
                            boolean bExists = false;
                            for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                if (tempRule1.equals(existingRule)) {
                                    bExists = true;
                                }
                            }
                            if (!bExists ){
                                exteriorNonTerminal.addRule(tempRule1);
                            }
                        }
                        else    {   //TWO NONTERMINAL CHILDREN or TOP
                            if (myNode.getChildNodes().size()==2) {
                                //find the 4 children: 2 exterior and 2 interior nonTerminals (nodes themselves are unlabeled, only nonterminals are labeled)
                                String myChildNodeName = myNode.getChildNodes().get(0).getName() + "@" + myNode.getChildNodes().get(0).getLabel()  + "@" + treeIndex;
                                NonTerminal interiorChild1 =  this.nonTerminals.get(myChildNodeName);
                                //System.out.println("interiorChild1=" + myChildNodeName);
                                //System.out.println(interiorChild1.getName());
                                myChildNodeName = myNode.getChildNodes().get(0).getName();
                                NonTerminal exteriorChild1 =  this.nonTerminals.get(myChildNodeName);
                                myChildNodeName = myNode.getChildNodes().get(1).getName() + "@" + myNode.getChildNodes().get(1).getLabel()  + "@" + treeIndex;
                                NonTerminal interiorChild2 =  this.nonTerminals.get(myChildNodeName);
                                myChildNodeName = myNode.getChildNodes().get(1).getName();
                                //System.out.println("interiorChild2=" + myChildNodeName);
                                //System.out.println(interiorChild2.getName());
                                NonTerminal exteriorChild2 =  this.nonTerminals.get(myChildNodeName);

                                //create 4 RightHandSides for the rules, 
                                //each of them are attached to both interior and exterior nonTerminal (LHS)
                                //rhsArrayofConstituents.clear();
                                //rhsArrayofConstituents.add(interiorChild1);
                                //rhsArrayofConstituents.add(interiorChild2);
                                //two unique rules
                                //myNodeNonTerminal.addRule(new Rule(myNodeNonTerminal, rhsArrayofConstituents));
                                //interiorNonTerminal.addRule(new Rule(interiorNonTerminal, rhsArrayofConstituents));
                                exteriorNonTerminal.addRule(new Rule(exteriorNonTerminal, interiorChild1, interiorChild2 ));
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, interiorChild1, interiorChild2 ));
                                //System.out.println("PRINT RULES");
                                //System.out.println(myNodeNonTerminal.getName() + " --> " + interiorChild1.getName() + "  " + interiorChild2.getName());
                                //System.out.println(interiorNonTerminal.getName() + " --> " + interiorChild1.getName() + "  " + interiorChild2.getName());

                                //rhsArrayofConstituents.clear();
                                //rhsArrayofConstituents.add(interiorChild1);
                                //rhsArrayofConstituents.add(ChildNonTerminal2);
                                //two unique rules
                                //myNodeNonTerminal.addRule(new Rule(myNodeNonTerminal, rhsArrayofConstituents));
                                //interiorNonTerminal.addRule(new Rule(interiorNonTerminal, rhsArrayofConstituents));
                                exteriorNonTerminal.addRule(new Rule(exteriorNonTerminal, interiorChild1, exteriorChild2 ));
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, interiorChild1, exteriorChild2 ));
                                //System.out.println(myNodeNonTerminal.getName() + " --> " + interiorChild1.getName() + "  " + ChildNonTerminal2.getName());
                                //System.out.println(interiorNonTerminal.getName() + " --> " + interiorChild1.getName() + "  " + ChildNonTerminal2.getName());

                                //rhsArrayofConstituents.clear();
                                //rhsArrayofConstituents.add(ChildNonTerminal1);
                                //rhsArrayofConstituents.add(interiorChild2);
                                //two unique rules
                                //myNodeNonTerminal.addRule(new Rule(myNodeNonTerminal, rhsArrayofConstituents));
                                //interiorNonTerminal.addRule(new Rule(interiorNonTerminal, rhsArrayofConstituents));
                                exteriorNonTerminal.addRule(new Rule(exteriorNonTerminal, exteriorChild1, interiorChild2 ));
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, exteriorChild1, interiorChild2 ));
                                //System.out.println(myNodeNonTerminal.getName() + " --> " + ChildNonTerminal1.getName() + "  " + interiorChild2.getName());
                                //System.out.println(interiorNonTerminal.getName() + " --> " + ChildNonTerminal1.getName() + "  " + interiorChild2.getName());

                                //rhsArrayofConstituents.clear();
                                //rhsArrayofConstituents.add(ChildNonTerminal1);
                                //rhsArrayofConstituents.add(ChildNonTerminal2);
                                //unique rule
                                //interiorNonTerminal.addRule(new Rule(interiorNonTerminal, rhsArrayofConstituents));
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, exteriorChild1, exteriorChild2 ));
                                //System.out.println(interiorNonTerminal.getName() + " --> " + ChildNonTerminal1.getName() + "  " + ChildNonTerminal1.getName());

                                //this rule may not be unique
                                //tempRule = new Rule(myNodeNonTerminal, rhsArrayofConstituents);
                                tempRule = new Rule(exteriorNonTerminal, exteriorChild1, exteriorChild2 );
                                //System.out.println("RPPPP=" + myNodeNonTerminal.getName() + "-->" + ChildNonTerminal1.getName() + "  " + ChildNonTerminal2.getName());
                                boolean bExists = false;
                                for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                    if (tempRule.equals(existingRule)) {
                                        bExists = true;
                                    }
                                }
                                if (!bExists ) exteriorNonTerminal.addRule(tempRule);
                            }
                            else {  //TOP node
                                String myChildNodeName = myNode.getChildNodes().get(0).getName() + "@" + myNode.getChildNodes().get(0).getLabel()  + "@" + treeIndex;
                                NonTerminal interiorChild1 =  this.nonTerminals.get(myChildNodeName);
                                myChildNodeName = myNode.getChildNodes().get(0).getName();
                                NonTerminal exteriorChild1 =  this.nonTerminals.get(myChildNodeName);
                         
                                //create 2 RightHandSides for the rules, 
                                //each of them are attached to both interior and exterior nonTerminal (LHS)
                                
                                //two unique rules
                                exteriorNonTerminal.addRule(new Rule(exteriorNonTerminal, interiorChild1));
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, interiorChild1));
                                
                                //rhsArrayofConstituents.clear();
                                //rhsArrayofConstituents.add(ChildNonTerminal1);
                                //two unique rules
                                interiorNonTerminal.addRule(new Rule(interiorNonTerminal, exteriorChild1));
                                
                                tempRule = new Rule(exteriorNonTerminal, exteriorChild1);
                                boolean bExists = false;
                                for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                    if (tempRule.equals(existingRule)) {
                                        bExists = true;
                                    }
                                }
                                if (!bExists ) exteriorNonTerminal.addRule(tempRule);
                            }
                    }
                }   //NONTERMINALS, part 2 : add rules

               
                }

            }
        }
    }  
    
      /*
     * enumerate the nodes of the parseTree, start with deepest level
     * add Terminals and NonTerminals to grammar, and create 8 rules for each NonTerminal node
        //add them to grammar with appropriate probabilities
     */
    public void addParseTreeToGrammarNoGoodman(parseTree myParseTree){
        
    	NonTerminal exteriorNonTerminal;
        Terminal myTerminal;
         
        //start with deepest level and climb up the tree
        for (int currentDepth = myParseTree.deepestLevel; currentDepth >=0; currentDepth--) {
            
            for (Node myNode : myParseTree.getNodes()) {
                //LOOP over all nodes, including terminal nodes
                if (myNode.getDepth() == currentDepth) {
                    
                    String myNodeName = myNode.getName();
                   
                    //first part: create Terminals or nonTerminals for the grammar
                    if (myNode.getType()==Main.TERMINAL) {  //TERMINALS
                        
                        //all terminals are converted to lower case
                        myNodeName = myNodeName.toLowerCase();
                        
                    	//System.out.println("Node is terminal");
                        //if Terminal is not yet in Arraylist of grammar then add it
                        if (!this.Terminals.containsKey(myNodeName)) {
                            //turn node into terminal and add to grammar
                            myTerminal = new Terminal(myNodeName);
                            this.Terminals.put(myNodeName, myTerminal);
                            }
                        else myTerminal = this.Terminals.get(myNodeName);
                    } 
                    else {  //NONTERMINALS
                        //create an EXTERIOR NonTerminal, but first check if it already exists
                        if (!this.nonTerminals.containsKey(myNodeName)){
                            //create exterior NonTerminal
                            exteriorNonTerminal = new NonTerminal(myNodeName);
                         
                            //add it to HashMap of nonTerminals of the grammar
                            this.nonTerminals.put(myNodeName, exteriorNonTerminal);
                            } 
                 
                        else {  //return reference
                            exteriorNonTerminal = this.nonTerminals.get(myNodeName);
                           }
                        
                     
                        //PART 2: add rules - only if current node is NONTERMINAL
                        //ArrayList<Constituent> rhsArrayofConstituents = new ArrayList<Constituent>();
                        Rule tempRule;
                        
                        //if current node is preterminal then no labels for children
                        if (myNode.getChildNodes().size()==1 && !(myNode.getName().equals("TOP")))   {   //PRETERMINAL OR TOP!!
                            
                            String daughterTerminalName = myNode.getChildNodes().get(0).getName().toLowerCase();
                            Terminal daughterTerminal = this.Terminals.get(daughterTerminalName);
                            
                            //System.out.println("daughterTerminalName=" + daughterTerminalName + "daughterTerminal=" + daughterTerminal.getName()); 
                            //for exterior nonTerminal, check if rule already exists
                            Rule tempRule1 = new Rule(exteriorNonTerminal, daughterTerminal);
                            boolean bExists = false;
                            for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                if (tempRule1.equals(existingRule)) {
                                    //update rulecount
                                    existingRule.increaseCount();
                                    bExists = true;
                                }
                            }
                            if (!bExists ){
                                exteriorNonTerminal.addRule(tempRule1);
                            }
                            
                        }
                        else    {   //TWO or more NONTERMINAL CHILDREN or TOP
                            if (myNode.getChildNodes().size()==2) {
                                
                                //find the children
                                String myChildNodeName = myNode.getChildNodes().get(0).getName();
                                NonTerminal exteriorChild1 =  this.nonTerminals.get(myChildNodeName);
                                myChildNodeName = myNode.getChildNodes().get(1).getName();
                                NonTerminal exteriorChild2 =  this.nonTerminals.get(myChildNodeName);
                           
                               
                                
                                //this rule may not be unique
                                //tempRule = new Rule(myNodeNonTerminal, rhsArrayofConstituents);
                                tempRule = new Rule(exteriorNonTerminal, exteriorChild1, exteriorChild2 );
                                if (tempRule==null) System.out.println("tempRule==null");
                                //System.out.println("RPPPP=" + myNodeNonTerminal.getName() + "-->" + ChildNonTerminal1.getName() + "  " + ChildNonTerminal2.getName());
                                //System.out.println("myNodeNonTerminal=" + myNodeNonTerminal.getName());
                                //System.out.println("ChildNonTerminal1=" + ChildNonTerminal1.getName());
                                //System.out.println("ChildNonTerminal2=" + ChildNonTerminal2.getName());
                                boolean bExists = false;
                                for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                    if (tempRule.equals(existingRule)) {
                                        //update rulecount
                                        existingRule.increaseCount();
                                        bExists = true;
                                    }
                                }
                                if (!bExists ) exteriorNonTerminal.addRule(tempRule);
                            }
                            else {  //TOP node
                                String myChildNodeName = myNode.getChildNodes().get(0).getName();
                                NonTerminal exteriorChild1 =  this.nonTerminals.get(myChildNodeName);
                         
                               //System.out.println("myChildNodeName=" + myChildNodeName + "exteriorChild1=" + exteriorChild1.getName()); 
                                tempRule = new Rule(exteriorNonTerminal, exteriorChild1);
                                boolean bExists = false;
                                for (Rule existingRule : exteriorNonTerminal.getRules()) {
                                    if (tempRule.equals(existingRule)) {
                                        //update rulecount
                                        existingRule.increaseCount();
                                        bExists = true;
                                    }
                                }
                                if (!bExists ) exteriorNonTerminal.addRule(tempRule);
                            }
                    }
                }   //NONTERMINALS, part 2 : add rules

               
                }

            }
        }
    }  
    
    public void prepareCFGGrammarRulesForPrinting(parseTree myParseTree, HashMap<String, HashMap<String, Integer>> rulesForPrinting, HashMap<String, Integer> preterminalRulesForPrinting, HashSet<String> terminalsForPrinting) {
           
        String myNodeName=null;
        StringBuffer myRuleString;
        String preterminalRule = null;
        for (Node myNode : myParseTree.getNodes()) {
            //LOOP over all nodes, including terminal nodes

            //WITHOUT SEMANTICS
            myNodeName = myNode.getName().split("-")[0].split("=")[0];
            //XXX WITH SEMANTICS
            // myNodeName = myNode.getName();
             //first part: create Terminals or nonTerminals for the grammar
            if (myNode.getType()==Main.NONTERMINAL) { //ignore terminal nodes
                //create StringBuffer
                myRuleString = new StringBuffer();
                //add LHS, remove semantic annotation after -
                 myRuleString.append(myNodeName).append(" ");
                
                //add RHS, find daughters
                for (Node daughterNode : myNode.getChildNodes()) {
                    //if terminal then make it lowerCase
                    if (!Main.GRAMMAR_PRINTING_PRETERMINALS && daughterNode.getType()==Main.TERMINAL) {
                        myRuleString.append(daughterNode.getName().toLowerCase().split("-")[0].split("=")[0]).append(" ");
                    }
                    else 
                    //XXX TEMP WITH SEMANTICS
                    //myRuleString.append(daughterNode.getName()).append(" ");
                    myRuleString.append(daughterNode.getName().split("-")[0].split("=")[0]).append(" ");
                }
                //create entry for nonT
                if (rulesForPrinting.get(myNodeName)==null) {
                    //create empty HM
                    HashMap<String, Integer> tempHM = new HashMap<String, Integer>();
                    rulesForPrinting.put(myNodeName, tempHM);
                }
                //get the HP for this LHS nonT
                HashMap<String, Integer> rulesOfNonT = rulesForPrinting.get(myNodeName);
                //identify this rule in HashMap, if exists increase rule count by one
                //if not put into HashMap with count =1
                if (rulesOfNonT.get(myRuleString.toString())==null) {
                    rulesOfNonT.put(myRuleString.toString(), new Integer(1));
                }
                else {  //already existed: increase count by one
                    rulesOfNonT.put(myRuleString.toString(), new Integer(rulesOfNonT.get(myRuleString.toString()).intValue()+1));
                }
            }
            else {  //myNode.getType()==Main.TERMINAL
          
                if (Main.GRAMMAR_PRINTING_PRETERMINALS) {
                    //create preTerminal node
                    preterminalRule = myNodeName + " " + myNodeName.toLowerCase() + " ";

                    if (preterminalRulesForPrinting.get(preterminalRule)==null) {
                        preterminalRulesForPrinting.put(preterminalRule, new Integer(1));
                    }
                    else {  //already existed: increase count by one
                        preterminalRulesForPrinting.put(preterminalRule, new Integer(preterminalRulesForPrinting.get(preterminalRule).intValue()+1));
                    }
                }
                else {
                    terminalsForPrinting.add(myNode.getName());
                }
            }
        }
    }
    
    
   public void calculateRuleProbabilities() {
       
       for(NonTerminal nt : this.nonTerminals.values()){
           int totFrequency = 0;
           for (Rule myRule : nt.getRules()) {
               totFrequency += myRule.getCount();
           }
           for (Rule myRule : nt.getRules()) {
               if (Main.DOGOODMANDOP)  myRule.computeRuleProbability();
               else myRule.setRuleProbability(((double) myRule.getCount())/((double) totFrequency));
           }
            
        }
   }
    
       
    public void indexRulesAccordingToRHS() throws IOException {
    	//Set of all rules
        //HashSet<Rule> allRules = getAllRules();
        //HashSet<Rule> allRules = new HashSet<Rule>();
        String myRHSOne;
        
        for(NonTerminal nt : nonTerminals.values()){
            //allRules.addAll();
            
            for(Rule myRule : nt.getRules()) {
            	
                //for every RHS there is a HashSet of rules
                //this HashSet together with the RHS are put into HashMap rulesIndexedByUnaryRHS
                 
                myRHSOne = myRule.getRightHandSide().get(0).getName();
                
                if (myRule.getRightHandSide().size() ==2) { //only reason to store it is for binary rules in CYK induction
                    //XXX TEMP SOLUTION check for BUG
                    //if (myRule.getRightHandSide().get(0)!=null && myRule.getRightHandSide().get(1)!=null) {  
                    
                        if (rulesIndexedByBodyLeftNonT.get(myRHSOne)== null) {
                            //create new HashSet<Rule>;
                            HashSet<Rule> tempHashSet = new HashSet<Rule>();
                            tempHashSet.add(myRule);
                            this.rulesIndexedByBodyLeftNonT.put(myRHSOne, tempHashSet);
                        }
                        else{
                            HashSet<Rule> tempHashSet = this.rulesIndexedByBodyLeftNonT.get(myRHSOne);
                            tempHashSet.add(myRule);
                        }
                    //}
                    //else {
                        //System.out.println("BUG for rule " + myRule.getRightHandSide().get(0))
                    //}
                }
                else    {
                	
                    if (rulesIndexedByUnaryRHS.get((myRHSOne + "*"))== null) {
                        //create new HashSet<Rule>;
                        HashSet<Rule> tempHashSet = new HashSet<Rule>();
                        tempHashSet.add(myRule);
                        this.rulesIndexedByUnaryRHS.put(myRHSOne + "*", tempHashSet);
                    }
                    else{
                        HashSet<Rule> tempHashSet = this.rulesIndexedByUnaryRHS.get(myRHSOne + "*");
                        tempHashSet.add(myRule);
                    }
                }
                
            } 
        }
    }
    
    public HashSet<Rule> getAllRules(){
        //Set of all rules
        HashSet<Rule> allRules = new HashSet<Rule>();
        
        for(NonTerminal nt : nonTerminals.values()){
            allRules.addAll(nt.getRules());
        }
        
        return allRules;
    }
    
    public HashSet<Rule> getRulesFromRHS(String myRHS){
        return this.rulesIndexedByUnaryRHS.get(myRHS);
    }
    
    public Object clone() {
        Grammar result = new Grammar();
        
        result.Terminals = (HashMap<String, Terminal>) this.Terminals.clone();
        result.nonTerminals = (HashMap<String, NonTerminal>) this.nonTerminals.clone();
        //result.rulesIndexedByUnaryRHS = (HashMap<String, HashSet<Rule>>) this.rulesIndexedByUnaryRHS.clone();
        
        return result;
    }
    
    
}
