Ñò
P±Nc           @   sW  d  d k  l Z d  d k l Z d  d k l Z l Z l Z l Z l Z l	 Z	 l
 Z
 l Z l Z d  d k l Z l Z l Z d  d k l Z l Z l Z l Z d  d k l Z d  d k l Z d  d k l Z d  d	 k l Z d  d
 k l Z d  d k  l! Z! l" Z" l# Z# d  d k$ l% Z% l& Z& e' Z( d Z) e* a+ e
 ƒ  Z, d a- d d „ Z. e/ d d d „ Z0 d „  Z1 d f  d „  ƒ  YZ2 e' e/ d „ Z3 d d „ Z4 d „  Z5 d „  Z6 d „  Z7 d „  Z8 d „  Z9 d „  Z: d „  Z; d  „  Z< d! „  Z= d" „  Z> d# „  Z? d$ „  Z@ d% „  ZA d& „  ZB d' „  ZC d( „  ZD d) „  ZE d* „  ZF d+ „  ZG d, „  ZH d- „  ZI e* d. „ ZJ d d/ „ ZK d0 „  ZL e* e* e* d1 „ ZM d2 „  ZN d3 „  ZO d4 „  ZP d5 „  ZQ d6 „  ZR eS d7 j o“ d  d8 kT ZT eT iU d9 e* d: eT iV eT iW Bƒ \ ZX ZY eY o eX o d; eY GHn e o e d o e d d< iZ ƒ  j o e[ e d d= ƒ n n d8 S(>   iÿÿÿÿ(   t   BitParChartParser(   t   memoize(	   t   Treet   WeightedProductiont   WeightedGrammart   edit_distancet   ViterbiParsert   FreqDistt   WordNetLemmatizert   Nonterminalt   ImmutableTree(   t	   f_measuret	   precisiont   recall(   t   chaint   combinationst   productt   permutations(   t   log(   t   mul(   t   defaultdict(   t   sample(   t   argv(   t   heappusht   heappopt   nlargest(   t   stdt   meani
   i   t   topc            s¯  d „  } d „  } g  g  } } d d t  | g  ƒ f } \ ‰  } }	 | i ˆ  | |	 f ƒ x3t | ƒ D]%\ }
 } x| | | | ƒ oï t | ƒ } xÜ ‡  f d †  |  Dƒ D]Ä } ˆ  | i ƒ  } t |	 t | ƒ ƒ } | d  j o q¬ n | i ƒ  d | j o= | |
 d } | | | ƒ } | | | f } t	 | | ƒ q¬ | } | | | ƒ } | | | f } t	 | | ƒ q¬ Wqv Wg  } | | } } qg Wt | ƒ \ } } } | | f S(   Nc         S   s   |  d d j  o t  Sd  S(   Ni    g-Cëâ6
¿(   t   True(   t   ht   Ct   N(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   above_threshold   s     c         S   s   d S(   Ng      ð¿(    (   t   t1t   w1(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   LAP   s    iÿÿÿÿc         3   s5   x. |  ]' } | i  ƒ  ˆ  d  j o	 | Vq q Wd S(   i    N(   t   prob(   t   .0t   x(   t   q(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>%   s   	 i    i   (
   R   t   appendt	   enumerateR   R%   t   joint   production_to_treet   Nonet   rhsR   (   t   grammart   sentt   rootR!   R$   R   R    R   t   rt   tt   it   wR'   t   q1R"   R#   t   r1t   h1R%   t   foot   tree(    (   R(   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   topdownparse   s:    		%    i   c         c   sÀ  | d  j o t d g  ƒ } n | | j o
 t ‚ n g  } | i ƒ  D]' } t | ƒ d j o | | i qH qH ~ } | ot | d ƒ } g  }	 |  i | D] } |	 | q¡ ~	 }
 g  g  } } xE |
 D]= } | i ƒ  | i	 ƒ  j o | i
 | ƒ qË | i
 | ƒ qË WxÀ | D]¸ } t | t | ƒ ƒ } | i ƒ  | t | i ƒ  ƒ  j o qn |  i | i | ƒ |  i | i
 | ƒ xH t |  | t | i ƒ  ƒ | | ƒ D]! \ } } | | i ƒ  | f Vq¦WqWxê | D]¼ } t | t | ƒ ƒ } | i ƒ  | t | i ƒ  ƒ  j o qÖn |  i | i | ƒ |  i | i
 | ƒ xL t |  | t | i ƒ  ƒ | | d ƒ D]! \ } } | | i ƒ  | f VqmWqÖWn# | i ƒ  | j o | d f Vn d  S(   NR   i    i   (   R-   R   t   StopIterationt   subtreest   lent   nodeR	   t
   _lhs_indext   lhsR.   R)   R+   R,   t   leavest   removet   mytopdownparseR%   (   R/   R0   R3   t   deptht
   depthlimitt   _[1]t   at   substitution_sitest   leftmostt   _[2]t
   candidatest   rect   nonrecR'   R"   t   p(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRD   8   sL      
A(   # 
  #  ! c         C   s‚   x{ |  i  ƒ  D]m } t |  | t ƒ oS |  | i | i j o< t |  | ƒ d j o% |  i t ƒ } | | i | ƒ | Sq Wd  S(   Ni    (   t   treepositionst
   isinstanceR   R?   R>   t   copyR   t   extend(   RH   t   bt   ct   d(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR+   W   s     Bt   TransformationDOPc           B   sz   e  Z d  „  Z d d „ Z e d „ Z d „  Z d „  Z e e	 e d d „ Z
 e d d „ Z e d	 „ Z e e d
 „ Z RS(   c         C   s7   t  t ƒ |  _ t  t ƒ |  _ t ƒ  |  _ h  |  _ d  S(   N(   R   R   t   grammardictt   mygrammardictt   fdt   mangled(   t   self(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   __init__`   s    t   leftc   	      C   sã   d GHx× | D]Ï \ } } } } | d j o | | } } n | i  ƒ  } t | ƒ } t | ƒ } t d „  | i ƒ  ƒ d } |  i | i | | f | ƒ |  i | i  ƒ  i | | f | ƒ |  i i | i	 ƒ  t
 | ƒ ƒ q Wd  S(   Ns   adding to grammart   rightc         S   s   t  |  i ƒ  ƒ d  j S(   i    (   R>   R.   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   <lambda>o   s    i    (   t   freezet   tuplet
   my_flattent   filtert   productionsRX   t   incRY   RZ   RA   t   float(	   R\   t   mlstst   sourcet   lefttreet	   righttreet   linkst   countt   flattened_treet   index(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   add_to_grammarf   s     	#c         C   sw   | o5 t  d „  |  i i ƒ  Dƒ d d „  d t ƒ|  _ n8 t  t d „  |  i i ƒ  Dƒ ƒ d d „  d t ƒ|  _ d  S(   Nc         s   s   x |  ] } | Vq Wd  S(   N(    (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>x   s   	t   keyc         S   s   t  |  i ƒ  ƒ S(    (   R>   RP   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   z   s    t   reversec         s   s%   x |  ] } t  | ƒ i ƒ  Vq Wd  S(   N(   t   undecorate_with_idsRa   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>}   s   	c         S   s   t  |  i ƒ  ƒ S(    (   R>   RP   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`      s    (   t   sortedRY   t   keysR   t   mygrammarsortedt   set(   R\   t   withids(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   sort_grammaru   s    			c         C   s&  x| D]\ } } t  d „  | i ƒ  Dƒ ƒ } xì | i ƒ  D]Þ \ } } t i | d ƒ } t i | d ƒ | j o& t | | d | | d g ƒ } n t | | g ƒ } t | | g ƒ }	 |	 i ƒ  d }
 |  i |
 i | d f ƒ |  i |	 i | d f ƒ |  i	 i |
 i
 ƒ  d ƒ q< Wq Wd  S(   Nc         s   s7   x0 |  ]) \ } } t  i | d  ƒ | | f f Vq Wd S(   t   vN(   t   wnlt	   lemmatize(   R&   R5   RO   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>†   s   	 Rz   i   i    g      ð?(    (    (   t   dictt   posR{   R|   R
   Re   RX   Rf   RY   RZ   RA   (   R\   t   corpusRH   RT   t   blemt   wordR~   t   lemR_   R^   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt	   extendlex‚   s      &c         C   sr   xk |  i  i ƒ  D]Z \ } } d | GHxA | i ƒ  D]3 \ \ } } } d | GHd t | ƒ GHd | GHq2 WHq Wd  S(   Ns   Source rule: %s
s
     Tree: %ss     Links: %ss     Count: %d
(   RX   t   itemst   repr(   R\   Rq   t   valueR:   Rl   Rm   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   print_grammar’   s     	 	t   Sc         C   sæ  g  } |  i  i ƒ  D]D \ } } | t | i ƒ  | i ƒ  d | d „  | i ƒ  Dƒ ƒ ƒq ~ } | ov| oð g  } t ƒ  }	 xÍ |  i  i ƒ  D]¼ \ } } | | i ƒ  ƒ }
 t t | ƒ ƒ } | i	 ƒ  |	 i
 | i ƒ  ƒ xl | i ƒ  D]^ } t | i ƒ  ƒ oE | i d t | i ƒ  ƒ d i t t | i ƒ  ƒ ƒ f |
 f ƒ qè qè WqŽ W| t |	 ƒ f Sg  } |  i  i ƒ  D]K \ } } | t | i ƒ  | i ƒ  d | | i ƒ  ƒ |  i | i ƒ  ƒqs~ } t t | ƒ | ƒ Sn | Sd  S(   NR%   c         s   s%   x |  ] \ \ } } } | Vq Wd  S(   N(    (   R&   R:   Rl   Rm   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Ÿ   s   	 s   %s	%ss   	(   RX   R„   R   RA   R.   Rw   t   valuest   forceposR,   t   chomsky_normal_formt   updateR~   Re   R>   R)   t   strR+   t   mapt   listRZ   R   R	   (   R\   t   freqfnR%   t   bitparR1   RG   Rq   R†   R/   t   lexiconRm   t   tmpRH   RK   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   get_grammar›   s,    Z	 
 5ac         C   sÓ  t  ƒ  } t t t d „  |  i Dƒ ƒ ƒ } xà |  i i ƒ  D]Ï \ } } t | i ƒ  ƒ } t i	 | ƒ } | o t
 | ƒ i ƒ  n x„ | i ƒ  D]v }	 t |	 i ƒ  ƒ o] | oB | i d t |	 i ƒ  ƒ d i t t |	 i ƒ  ƒ ƒ f | ƒ q| i |	 | ƒ q q Wq; W| o | i ƒ  | f St  ƒ  }
 x0 | i ƒ  D]" \ } } |
 i | i ƒ  | ƒ q<Wt t | ƒ g  } | i ƒ  D]J \ } } | t | i ƒ  | i ƒ  d | t |
 i | i ƒ  | ƒ ƒ ƒq~ ƒ S(   Nc         s   s   x |  ] } | i  ƒ  Vq Wd  S(   N(   RB   (   R&   R:   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Á   s   	 s   %s	%ss   	R%   (   R   R   t   reduceR   RY   R„   t   maxR‰   R   t   convertRŠ   R‹   Re   R>   R.   Rf   R   RA   R+   RŽ   R   R	   R   Rg   t   get(   R\   R‘   R1   R/   R’   Rq   R†   Rm   R“   RH   RZ   t   kRz   RG   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   get_my_grammar¿   s2    	   *	  c   +      #   sC  d „  } d „  } t  } t  } xã|  i D]Ø} | ˆ | ƒ oÂt } x¹|  i | D]¦\ } }	 |  i | | |	 f }
 |  i i t | i ƒ |
 ƒ } t i	 | ƒ } t
 t | ƒ ƒ } t
 t | ƒ ƒ } | p | |
 | f Vt } qR n g  } x5 | D]- } | i t
 |  i ˆ | d | ƒ ƒ ƒ qö Wt
 t | Œ  ƒ } x¼ | D]´ } t i	 | ƒ } xœ t | |	 ƒ D]V \ \ } } } | | d } t p | i | | i j o | | | | d <qePqeW|
 | t t d „  | Dƒ d ƒ } | | f Vt } q@WqR Wq( q( W| o8 ˆ i d j o( ˆ d |  i i t ˆ i ƒ ƒ f Vnü| oó| oìt ˆ ƒ oßt g  } |  i D]3 } | i ˆ i j o | | ˆ | ƒ | f qqqq~ d	 t ƒt  } x‚| D]v\ \ } }	 } | d
 j o Pn t ‡ f d †  | i ƒ  Dƒ ƒ o qÁn t i	 | ƒ } g  g  ‰ } t | ƒ } | i d d „  ƒ g  } | D] } | | d
 qU~ } g  } | i ƒ  D], } t | | t ƒ o | | | i q€q€~ } g  }  ˆ i ƒ  D]M } t ˆ | t ƒ o3 ˆ | i | j p ˆ | i | j o |  | qÆqÆ~  }! |! i d t ƒ g  }" |! D] } |" ˆ | i q7~" ‰  xÆ | D]¾ \ }# } | |	 j o¥ |	 | }$ |	 | =ˆ |$ | | <g  }% |! D]9 } | t |$ ƒ  |$ j o | |$ d  j o |% | qžqž~% }! g  }& |! D] } |& ˆ | i që~& ‰  ˆ i |# ƒ q[q[Wˆ p qÁn g  }' ˆ i ƒ  D] } | |! j o |' | q<q<~' } d i ƒ  }( t ‡  f d †  |( Dƒ ƒ o% t ‡ f d †  |( Dƒ ƒ o qÁn xƒ |  i | | ƒ D]o \ }) } |) i ƒ  g  j o qÄn t |) ˆ |! ƒ }* |* |  i |) i ƒ  ˆ i ƒ  t |! ƒ f <|* | | f VqÄWqÁWn d  S(   Nc      	   S   s’   x‹ | i  ƒ  D]} } yl |  | | | j o w n |  | i | | i j o t St | | ƒ d t |  | ƒ f j o t SWq t SXq Wt S(   Ni    (   RP   R?   t   FalseR>   R   (   R:   t	   candidatet   idx(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   matchÛ   s       '	 	c            s¦  d } t  | i ƒ  ƒ } h  } x9t | i ƒ  d d t ƒD]} yàt | | t ƒ o# |  | | | j o | d 7} n7 t | | t ƒ o" | | |  i ƒ  j o d h  f S| | i g  } |  | d  D] } | | i qÎ ~ j o | d 7} n |  | i | | i j o | d 7} n g  } t	 |  | d  ƒ D]. \ ‰  } | i | | i j o | ˆ  q;q;~ }	 g  }
 t	 | | d  ƒ D]. \ ‰  } | i | | i j o |
 ˆ  q‹q‹~
 } x[ |	 D]S ‰  t
 ‡  f d †  | Dƒ ƒ d } | d  ˆ  f | | d  | f <| i | ƒ qÉWWn n Xt | ƒ d j o | d j o d h  f Sq; W| t |  i ƒ  ƒ } | t | i ƒ  ƒ } d | | | | } | | f S(	   Ng        i   Rq   i    iÿÿÿÿg      à?c         3   s#   x |  ] } | ˆ  | f Vq Wd  S(   N(    (   R&   t   m(   t   n(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>þ   s   	 i   (   Rg   t   heightRt   RP   R>   RQ   R   RB   R?   R*   t   minRC   (   R:   Rœ   t   simR   Rl   R   RG   RH   RK   t   matchest   _[3]t	   competingt   nearestRT   t   result(    (   R    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt
   similarityå   s@     
0PP      i   c         s   s   x |  ] } | d  Vq Wd S(   i   N(    (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>,  s   	 R   t   st   sqt   vpRr   i    c         3   s%   x |  ] } | ˆ  i  ƒ  j Vq Wd  S(   N(   RB   (   R&   RH   (   R:   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>A  s   	 Rq   c         S   s   t  |  d  ƒ S(   i   (   R>   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   J  s    iÿÿÿÿs   md vbz vbp vbd vbc         3   s   x |  ] } | ˆ  j Vq Wd  S(   N(    (   R&   RH   (   t   nodes(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>b  s   	 c         3   s   x |  ] } | ˆ  j Vq Wd  S(   N(    (   R&   RH   (   t   covered(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>b  s   	 (   s   topRª   s   sqR¬   (    R›   Rv   R   RY   RZ   R˜   R	   R?   R   R—   R   t   frontier_nodesR)   t   my_mlt_derivR   t   zipt   CHECK_SUBSTITUTIONR•   R   R>   Rt   t   NUM_PARTIALt   anyRB   t   sortRP   RQ   t   splitt   mungeR[   Ra   Rb   (+   R\   R:   t   allowpartialRž   R©   t   yieldedt   matchedRœ   Rk   Rl   Rm   t   lhscountt   targett
   lfrontierst	   frontierst   new_subtree_forestRH   t   new_subtreest   subtreet   freqRo   R   R%   RG   t   partialt   scoret   newtreet
   notcoveredRK   t   fnodesR¥   t   nnodest   _[4]t   indicest   _[5]R?   t   oldidxt   _[6]t   _[7]t   _[8]t	   verbheadst   derivR¨   (    (   R­   R®   R:   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR°   Ù   sœ    	
	&
   +  $(Z  #%F+<( 
M( 8;   (c         #   s–  | i  ƒ  d } | ˆ i j o ˆ i | i ƒ  } n† ˆ od ˆ  o d G| GHn t | ƒ g  } t d „  | Dƒ ƒ D] \ } } | | qv ~ f d f g } n ˆ  o d G| GHn t ‚ | i ƒ  ˆ i j o ˆ i | i ƒ  }	 n t d | i ƒ  ƒ ‚ t	 t
 ‡  ‡ ‡ f d †  | Dƒ Œ  ƒ }
 ˆ  oG d GHx? |
 D]3 } | p q:n d Gx | D] } | GHqYWd	 GHq:Wn t } xh| D]`\ \ } } } t | |	 ƒ } | d j p t ‚ t | ƒ } x|
 D]} t i | ƒ } xû t | ƒ D]Â \ } } | | d i | | | d
 i j } t p | o: | | d | | | d
 <ˆ  o d G| | d GHqšn3 ˆ  o* | | d Gd G| Gd G| GHd G| GHn P| p | d 8} qíqíW| t d „  | Dƒ ƒ 7} | | f Vt } qËWq‚Wˆ o¥| ot | ƒ g  } t d „  | Dƒ ƒ D] \ } } | | q~ d } } } t | |	 ƒ } | d j p t ‚ t | ƒ } x|
 D]} t i | ƒ } xû t | ƒ D]Â \ } } | | d i | | | d
 i j } t p | o: | | d | | | d
 <ˆ  o d G| | d GHqFn3 ˆ  o* | | d Gd G| Gd G| GHd G| GHn P| p | d 8} q™q™W| t d „  | Dƒ ƒ 7} | | f Vt } qwWn d  S(   Ni    s	   not foundc         s   s-   x& |  ] } t  | t ƒ o	 | Vq q Wd  S(   N(   RQ   R   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>u  s   	 g      ð?s    %s not in fd c         3   s<   x5 |  ]. } t  | t ƒ o ˆ i | ˆ ˆ  ƒ Vq q Wd  S(   N(   RQ   R   t   get_mlt_deriv_multi(   R&   RH   (   t   verboseR\   t	   smoothing(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>~  s   	 s   subtree forestt   <t   >i   t   substituteds   does not fit withRi   RL   i   c         s   s   x |  ] \ } } | Vq Wd  S(   N(    (   R&   RH   R%   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>œ  s   	 c         s   s-   x& |  ] } t  | t ƒ o	 | Vq q Wd  S(   N(   RQ   R   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>   s   	 g      à?c         s   s   x |  ] \ } } | Vq Wd  S(   N(    (   R&   RH   R%   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>´  s   	 (   Re   RX   R„   R,   R*   R<   RA   RZ   t
   ValueErrorR   R   R›   R   t   AssertionErrorR¯   R   R—   R?   R²   t   sumR   (   R\   R:   RÔ   RÓ   t   top_productionRL   RG   R    RH   R»   R¿   RT   R¹   Rk   Rl   Rm   R%   R¾   RÀ   R¼   Ro   Rž   RK   (    (   RÓ   R\   RÔ   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRÒ   n  s”     P (    	   &  K  &  (   t   __name__t
   __module__R]   Rp   R›   Ry   Rƒ   R‡   RÚ   R   R”   Rš   R°   RÒ   (    (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRW   _   s   				$•c            s  | p  t  d „  |  i ƒ  Dƒ ƒ } n ‡  f d †  ‰  t } g  } g  } |  i t ƒ }  | i t ƒ } xô| oìd } d  } d  }	 x¨ t |  ƒ D]š \ }
 } } xˆ t | ƒ D]z \ } } } t | t ƒ o[ ˆ  | | ƒ oK t t	 | ƒ ƒ | j o. t t	 | ƒ ƒ } | | |
 | | | f } q%q« q« Wq Wt
 oþ| d  j oñxît |  ƒ D]Ü\ }
 } } t | t ƒ o½t | ƒ d j oªt | d ƒ t j o“xt | ƒ D]~\ } } } t | t ƒ o_t | ƒ d j oLt | d ƒ t j o5| i d d j o | i d d j p  | i d j oq | i d j od t i | d d ƒ t i | d d ƒ j pk | d d j o& | d d j o | d | d j p7 | d d j ot | d d j oc | d | d j oN | d | d j o n d G| d Gd G| d GH| | |
 | | | f }	 q"q¤q¤WqNqNWn | oh | \ } } }
 } } } | o t | | ƒ \ } } n t | i g  ƒ |
 | <t | i g  ƒ | | <np |	 oh |	 \ } } }
 } } } | o t | | ƒ \ } } n t | i g  ƒ |
 | <t | i g  ƒ | | <n |	 o | i | | f ƒ qi | d  j o
 t } qi | i | | f ƒ qi Wg  } x| | D]t \ } } xe t | i ƒ  | i ƒ  ƒ D]H \ } } t | i ƒ  ƒ d j o# | i t | ƒ t | ƒ f ƒ q’q’WqjW| i | ƒ | i |  | f ƒ | S(   Nc         s   s   x |  ] } | | f Vq Wd  S(   N(    (   R&   R5   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>½  s   	 c            s  t  |  ƒ t  | ƒ j oý t |  t ƒ o³ t | t ƒ o£ |  i i d ƒ d | i i d ƒ d j p: |  i i d ƒ d d
 j oQ | i i d ƒ d d j o4 d |  i j p# t ‡  f d	 †  t |  | ƒ Dƒ ƒ St Sqt |  t ƒ o& t | t ƒ o |  i	 ƒ  | i	 ƒ  j Sn t Sd  S(   Nt   @iÿÿÿÿt   !i    Rª   R«   Rˆ   t   SQc         3   s(   x! |  ] \ } } ˆ  | | ƒ Vq Wd  S(   N(    (   R&   RH   RT   (   t   ideq(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Æ  s   	 (   Rª   s   sqRˆ   Rà   (   Rª   s   sqRˆ   Rà   (
   R>   RQ   R   R?   R¶   t   allR±   R›   R   t   lower(   t   tree1t   tree2(   Rá   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRá   ¾  s     -4:i    i   t   vVt   mdt   MDRz   t   iss   'st   nots   n'tt
   lemmatizeds   <=>(   s   mdRè   (   s   mdRè   (   s   iss   's(   s   iss   's(   s   nots   n't(   s   nots   n't(   R}   RB   R   RR   R-   t   my_subtreesRQ   R   R>   t   leaves_and_frontier_nodest   USE_LEMMATIZATIONt   typeR   R?   R{   R|   t   decorate_with_idst   decorate_pairR)   R›   R±   Re   R.   R,   RS   (   Rä   Rå   t   decoratet
   alignmentst   shared_subtreest   linked_subtreest   equivalentst   max_shared_subtree_sizet   max_shared_subtreet   lemmatized_equivalentst   parent1t   num1R4   t   parent2t   num2t   jt   minimal_subtreesRH   RT   (    (   Rá   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   minimal_linked_subtrees¸  s‚         ( : :( -770  
  %iè  c            sJ  d „  } d GHg  } xÈ ˆ  D]À \ } } t  d „  t | ƒ ƒ } t  d „  t | ƒ ƒ } g  } | D]5 }	 | | i |	 ƒ | | |	 i i d ƒ d ƒ f qb ~ }
 g  } |
 D] }	 | |	 d q« ~ }
 | i | | |
 f ƒ q Wd GHg  } xY| D]Q\ } } } | G| G| GHt t | ƒ ƒ } t d „  t | ƒ Dƒ ƒ } g  } | i ƒ  D]I } | i d ƒ d	 | j o) | | | | | i d ƒ d	 | f qLqL~ } x¡t	 t
 | ƒ ƒ D]\ } }
 | o | | j o Pn g  } | D]( } | |
 j o | t | d
 ƒ qáqá~ } | i t ƒ } | i t ƒ } xK |
 D]C \ } } } t | | i ƒ | | _ t | | i ƒ | | _ q7W| i | | | t t ‡  f d †  | Dƒ d ƒ f ƒ d | i j o{ | i t ƒ } | i t ƒ } t | i ƒ | _ t | i ƒ | _ | i | | | t t ‡  f d †  | Dƒ d ƒ f ƒ q±q±Wqñ W| S(   Nc         S   sB   x/ t  |  ƒ D]! \ } } | | i j o | Sq Wt d ƒ ‚ d  S(   Ns   myindex(x): x not in list(   R*   R?   RØ   (   t   lR'   R    RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   myindex  s
      	s
   Phase 1...c         S   s   t  |  t ƒ o d  |  i j S(   RÞ   (   RQ   R   R?   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`     s    c         S   s   t  |  t ƒ o d  |  i j S(   RÞ   (   RQ   R   R?   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`      s    RÞ   i   s
   Phase 2...c         s   s2   x+ |  ]$ \ } } | i  d  ƒ d | f Vq Wd S(   RÞ   iÿÿÿÿN(   R¶   (   R&   RH   RT   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>0  s   	 iÿÿÿÿi   c         3   s"   x |  ] } t  | ˆ  ƒ Vq Wd  S(   N(   Rm   (   R&   t   leaf(   Rõ   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>=  s   	 c         3   s"   x |  ] } t  | ˆ  ƒ Vq Wd  S(   N(   Rm   (   R&   R  (   Rõ   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>D  s   	 (   Rd   Rí   Ro   R?   R¶   R)   R}   R¯   Ru   R*   t   sublistsR   RR   R   t   rmidR•   R   (   Rõ   t   limit_subtreesR  t   linked_subtrees2R"   t   t2t   l1t   l2RG   R'   RH   RK   t   newtreesRl   t
   leafindex1t
   leafindex2R¥   R  t	   indices12R    RÉ   RT   RB   t   newtree1t   newtree2R2   R  t	   newtree1at	   newtree2a(    (   Rõ   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt&   linked_subtrees_to_probabilistic_rules  sN    	 I% c  < &2c            s  t  i ˆ ƒ ‰ ˆ i ƒ  } x] | oU | i d ƒ } t ˆ | t  ƒ o. ˆ | i ƒ  g  j o ˆ | =ˆ i ƒ  } q q Wx™| o‘| d } g  } ˆ i ƒ  D]0 } t ˆ | t  ƒ o ˆ | o | | q  q  ~ } | i d t ƒ g  }	 | D] } |	 ˆ | i qô ~	 ‰ d  } | | d  i }
 t | ƒ d j o |
 d j o
 d }
 n+ t | ƒ d j o |
 d j o
 d }
 n |
 ˆ j oA t | ƒ d t | ˆ i
 |
 ƒ ƒ j o | ˆ i
 |
 ƒ } nÊ t ‡ f d †  | | d  Dƒ ƒ o¤ g  } | | d  D] } | | i q~ ‰  t ‡ ‡  f d	 †  | Dƒ ƒ } g  } | D]* } t | ƒ t | ƒ j o | | qEqE~ } | o | i ƒ  } q“n | d  j oé xæ t d t | ƒ ƒ D]Ë } | | |  i }
 t | |  ƒ d j o |
 d j o
 d }
 n0 t | |  ƒ d j o |
 d j o
 d }
 n |
 ˆ j oB t | ƒ | t | ˆ i
 |
 ƒ ƒ j o | ˆ i
 |
 ƒ } Pq¶d
 } q¶Wn | o' t  i t ˆ | | | ƒ ƒ ˆ | <n  t  i t ˆ | | | ƒ ƒ ‰ g  } | D]( } | t | ƒ  | j o | | qáqá~ } q~ Wˆ S(   Ni    Rq   iÿÿÿÿi   Rª   R«   i   c         3   s"   x |  ] } | i  ˆ  j Vq Wd  S(   N(   R?   (   R&   R'   (   R­   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>b  s   	 c         3   s5   x. |  ]' } ˆ  | i  ˆ j o | d   Vq q Wd S(   iÿÿÿÿN(   R?   (   R&   R'   (   RÑ   t   siblings(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>e  s   	 (    (   R   R—   RP   t   popRQ   RB   Rµ   R>   R?   R-   Ro   R´   R   t   ranget
   guessorder(   RÑ   R:   RÆ   t   tpR   t   ncRG   RH   RÊ   RK   t   parentR¥   t   yR“   RÉ   R    RË   R'   (    (   R  R­   RÑ   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR·   G  sZ     + 
J(  
  
7%,>  % 
% 
7 'Ac            s£  d „  } | g  } | i  ƒ  D] \ } } | | q ~ g  } | | i  ƒ  D] \ } } | | qK ~ ƒ \ ‰ ‰  | g  } | i  ƒ  D] \ } } | | q„ ~ g  }	 | | i  ƒ  D] \ } } |	 | q² ~	 ƒ \ ‰ ‰ g  }
 x¡ t t |  ƒ d ƒ D]‰ } t d „  |  |  Dƒ Œ  } t d „  |  | Dƒ Œ  } |
 i t ‡ ‡  ‡ f d †  | Dƒ ƒ t ‡ ‡  ‡ f d †  | Dƒ ƒ | f ƒ qô W|  i t |
 ƒ d | | ƒ |  S(   Nc         S   sh   xU t  t |  ƒ ƒ D]A } |  | | t | ƒ !| j o |  |  |  | t | ƒ f Sq Wt d ƒ ‚ d  S(   Ns   middle not in list(   R  R>   RØ   (   R   t   middleR    (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt	   partitiony  s
     !i   c         s   s3   x, |  ]% } t  | t ƒ o | i ƒ  Vq q Wd  S(   N(   RQ   R   R~   (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>ƒ  s   	 c         s   s3   x, |  ]% } t  | t ƒ o | i ƒ  Vq q Wd  S(   N(   RQ   R   R~   (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>„  s   	 c         3   sJ   xC |  ]< \ } } | ˆ j p | ˆ j o | ˆ  j o	 d  Vq q Wd S(   i   N(    (   R&   RH   RT   (   t   leftpR_   R^   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>†  s   	 c         3   sJ   xC |  ]< \ } } | ˆ j p | ˆ j o | ˆ  j o	 d  Vq q Wd S(   i   N(    (   R&   RH   RT   (   t   rightpR_   R^   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>‡  s   	 iÿÿÿÿ(   R~   R  R>   R   R)   RÚ   t   insertR–   (   RÑ   R:   RÆ   R  RG   RH   RT   RK   R¥   RÉ   t	   positionsR    t   dleftt   dright(    (   R_   R  R  R^   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR  x  s    	gg % c         C   sb   g  } xU |  D]M } t  | t ƒ o* t | ƒ d j o | i t | ƒ ƒ q | i | ƒ q W| S(   Ni    (   RQ   R   R>   RS   Rí   R)   (   R:   RB   t   child(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRí   ’  s     #c         C   s   t  |  i t |  ƒ ƒ S(   N(   R   R?   Rí   (   R:   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRc   ›  s    c      	      sb   x[ ˆ  D]S \ } } | i  |  j o7 t t ‡  f d †  t d „  t | ƒ Dƒ ƒ Dƒ d ƒ Sq Wd S(   Nc         3   s&   x |  ] } t  | ˆ  ƒ d  Vq Wd S(   i   N(   Rm   (   R&   RU   (   Rõ   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>¢  s   	c         s   s   x |  ] \ } } | Vq Wd  S(   N(    (   R&   R'   R  (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>£  s   	 i   i    (   R?   R•   R   Rw   R¯   (   t   our_nodeRõ   RH   RT   (    (   Rõ   s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRm   Ÿ  s     )c         C   sŸ   t  |  ƒ o |  i d f g St |  ƒ t j o g  Sg  } xX t |  ƒ D]J \ } } | g  } t | ƒ D]  \ } } | | | f | f qi ~ 7} qI W| Sd  S(   N(    (   t   frontier_nodeR?   Rï   R   R*   R¯   (   R:   RÇ   R~   t   streeRG   t   fnodeR2   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR¯   §  s     Bc         C   s    t  |  t ƒ o t |  ƒ d j S(   Ni    (   RQ   R   R>   (   R:   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR&  ¸  s    c      
   C   s‰   d } x| t  d |  d ƒ D]g } | t t t  d |  d ƒ d ƒ t t t  d | d ƒ d ƒ t t t  d |  | d ƒ d ƒ 7} q W| S(   Ni    i   (   R  R•   R   (   R    R¨   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   comb»  s
     ec      
      s;   t  d t t  ‡  f d †  t t ˆ  ƒ d d ƒ Dƒ d ƒ ƒ S(   Nc         3   s"   x |  ] } t  ˆ  | ƒ Vq Wd  S(   N(   R   (   R&   RH   (   R  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Ã  s   	 i    iÿÿÿÿ(    (   (    (    (   R   R•   R  R>   (   R  (    (   R  s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR  Á  s    c         C   s   |  i  d ƒ d S(   NRÞ   i    (   R¶   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR  Ç  s    c         C   sž   |  i  t ƒ } | i  t ƒ } xs t | i ƒ  | i ƒ  ƒ D]V \ } } d | i j o: d | i t f | _ d | i t f | _ t d 7a q: q: W| | f S(   NRÞ   s   %s@%di   (   RR   R   R±   R=   R?   t
   current_id(   Rä   Rå   t   utree1t   utree2RH   RT   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRð   Ê  s     c         C   sO   t  i |  ƒ }  x9 |  i d d „  ƒ D]" } | i | i i d ƒ  | _ q% W|  S(   NRd   c         S   s   d  |  i  j S(   RÞ   (   R?   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   ×  s    RÞ   (   R   R—   R=   R?   Ro   (   R:   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRs   Õ  s
      c         C   s^   |  i  t ƒ } | i  t ƒ } d | i t f | _ d | i t f | _ t d 7a | | f S(   Ns   %s@%di   (   RR   R   R?   R*  (   Rä   Rå   R+  R,  (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRñ   Û  s    
c         C   s/   t  |  ƒ t j o t t |  ƒ g  ƒ S|  Sd  S(   N(   Rï   R	   R   R   (   R?   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   treeifyä  s    c         C   s@   t  t |  i ƒ  ƒ g  } |  i ƒ  D] } | t | ƒ q# ~ ƒ S(   N(   R   R   RA   R.   R-  (   t
   productionRG   R2   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR,   ê  s    c         c   s_   xX t  |  ƒ D]J \ } } |  | | f Vt | t ƒ o  x t | ƒ D] } | VqD Wq q Wd  S(   N(   R*   RQ   R   Rì   (   R:   R    R$  RÁ   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRì   í  s      c          C   s­   t  d ƒ }  t  d ƒ } t |  | ƒ } x | D] \ } } | G| GHq. Wt | ƒ } HHx% | D] } x | D] } | GHqi WHq\ Wt ƒ  } | i | ƒ | i ƒ  | i ƒ  } | S(   Ns2   (S (NP John) (VP (V bought) (NP (DET a) (N car))))s9   (S (VBZ did) (NP John) (VP (V buy) (NP (DET a) (N car))))(   R   R   R  RW   Rp   R‡   R”   (   Rä   Rå   R3   RH   RT   R  RU   t   gr(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   testô  s(         		
c          C   sq  t  d ƒ }  t  d ƒ } t |  | ƒ } d GHx | D] \ } } | G| GHq3 Wd GHt | ƒ } d GHx% | D] } x | D] } | GHqv WHqi Wt ƒ  } | i | ƒ | i ƒ  d GH| i ƒ  t  d ƒ } t  d ƒ }  | i |  |  f g ƒ t | i	 d	 d
 ƒ ƒ }	 d GH| i
 | d t ƒ\ } }
 |	 i ƒ  GH|
 G| GHy7 | i
 |	 i d i ƒ  ƒ d t ƒ\ } }
 |
 G| GHWn t j
 o } | GHn Xd GHt  d ƒ } x; t t | i |  ƒ ƒ d d „  ƒD] \ } }
 |
 G| GHq½Wt | i d	 d
 ƒ ƒ }	 |	 i ƒ  GHyW |	 i d i ƒ  ƒ }  x; t t | i |  ƒ ƒ d d „  ƒD] \ } }
 |
 G| GHq6WWn t j
 o } | GHn Xd  S(   NsC   (TOP (SQ (VBD Did) (NP (PRP I)) (VP (VB buy) (NP (PRP it))) (. ?)))s<   (TOP (S (NP (PRP I)) (VP (VBD bought) (NP (PRP it))) (. .)))s   
minimal linked subtreess   end
s'   
linked subtrees to probabilistic ruless   DOT grammarsi   (TOP Did (NP (NNP Mr.) (NNP Freeman)) (VP (VB have) (NP (NP (VB notice)) (PP (IN of) (NP (DT this))))) ?)sx   (TOP (SQ (VBD Did) (NP (NNP Mr.) (NNP Freeman)) (VP (VB have) (NP (NP (VB notice)) (PP (IN of) (NP (DT this))))) (. ?)))R1   t   TOPt   1RÔ   s%   Did Mr. Freeman have notice of this ?t   2Rq   c         S   s   |  d  S(   i   (    (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   %  s    c         S   s   |  d  S(   i   (    (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   +  s    (   R   R   R  RW   Rp   Ry   R‡   Rƒ   R   R”   t   get_mlt_derivR   R/   t   parseR¶   t	   ExceptionRt   R   R°   Rš   (   Rä   Rå   R3   RH   RT   R  RU   t   tdopR:   t   parserRO   t   e(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   test2  s`         		

	*
% % c         C   s   |  i  t ƒ } xi |  i d ƒ D]X } t |  | d  ƒ d j o7 t d |  | d  i |  | f |  | g ƒ | | <q q W| S(   sQ    make sure all terminals have POS tags;
	invent one if necessary ("parent_word") RB   iÿÿÿÿi   s   %s_%s(   RR   R   RP   R>   R   R?   (   R:   R¨   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRŠ   0  s     ;c         C   s[   |  i  t ƒ } xE |  i d ƒ D]4 } d |  | d  i j o |  | | | d  <q q W| S(   s3    removed forced POS tags of the form "parent_word" RB   t   _iÿÿÿÿ(   RR   R   RP   R?   (   R:   R¨   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   removeforcepos9  s     c         C   s<   x5 |  i  d d „  ƒ D] } | i i d d ƒ | _ q Wd  S(   NRd   c         S   s   d  |  i  j S(   s   !<>(   R?   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   B  s    s   !<>t    (   R=   R?   t   replace(   R:   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   remcnfmarksA  s     c            sS   | o d g ‰  n d i  ƒ  ‰  x, |  i d ‡  f d †  ƒ D] } d | _ q< Wd  S(   NR   s   top s sq sinv smain vp npRd   c            s&   |  i  ƒ  d  j o |  i i ƒ  ˆ  j S(   i   (   R¡   R?   Rã   (   R'   (   t   exclude(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   G  s    t   X(   R¶   R=   R?   (   R:   Râ   RH   (    (   R@  s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt
   foldlabelsD  s      c         C   sì  |  d i  i ƒ  d j o d |  d _  n xº|  i ƒ  D]¬} t |  | t ƒ o’|  | i ƒ  d j o{|  | } | i  i ƒ  d j o. t | ƒ | j o |  | d  i  d 7_  n | i  i ƒ  d	 i ƒ  j ot i	 | d i ƒ  d
 ƒ d j o= t | ƒ | j o* | i  d 7_  |  | d  i  d 7_  qàt i	 | d i ƒ  d
 ƒ d j o= t | ƒ | j o* | i  d 7_  |  | d  i  d 7_  qàt i	 | d i ƒ  d
 ƒ d j o& t | ƒ d j o | i  d 7_  qàqäq8 q8 Wd  S(   Ni    Rª   t   sinvt   Smaini   Rç   iÿÿÿÿs   -auxs   vb vbz vbd vbpRz   t   bet   havet   dos   -do(   Rª   RC  (
   R?   Rã   RP   RQ   R   R¡   R>   R¶   R{   R|   (   R:   t   vbdepthRH   t   preterminal(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt
   mark_be_doI  s"      +
)666c          C   sö  d }  d }  t  t |  i ƒ  i ƒ  ƒ }  t |  d  d  d … |  d d  d … ƒ }  d GHx/ |  D]' \ } } d t | ƒ t | ƒ f GHq\ Wt ƒ  } xX |  D]P \ } } t | | ƒ } t | ƒ } | i	 | ƒ | i	 t t | | ƒ ƒ ƒ q— W| i
 ƒ  | i d t ƒ \ } }	 t | |	 d d	 d
 d ƒ}
 | i d d	 ƒ } t | ƒ } d GHx¤t oœd Gt ƒ  } d  } d  } y< t t |
 i | i ƒ  ƒ ƒ d ƒ } | i ƒ  d G| GHWn t j
 o } | GHn Xy" | i | i ƒ  ƒ } d G| GHWn t j
 o } | GHn XyW xP | i | d t ƒD]9 \ } } d t | ƒ | f GHd Gd i | i ƒ  ƒ GHqWWn t j
 o } | GHn X| d  g  f j o qNn xV t | i | d t ƒƒ D]9 \ } } d t | ƒ | f GHd Gd i | i ƒ  ƒ GHq°WqNWd  S(   NsÀ  (S (NP John) (VP (V bought) (NP (DT a) (N car))))
	(S (VP (VBZ did)) (NP John) (VP (V buy) (NP (DT a) (N car))))
	(S (NP Mary) (VP (VBZ is) (ADJP (JJ happy))))
	(S (VBZ is) (NP Mary) (ADJP (JJ happy)))
	(S (NP (NP (DT the) (NN man)) (SBAR (WHNP (WP who)) (S (VP (VBZ is) (VP (VBG talking)))))) (VP (VBZ is) (VP (VBG walking))))
	(S (VBZ is) (NP (NP (DT the) (NN man)) (SBAR (WHNP (WP who)) (S (VP (VBZ is) (VP (VBG talking)))))) (VP (VBG walking)))sD  (S (NP Mary) (VP (VBZ is) (ADJP (JJ happy))))
	(S (VBZ is) (NP Mary) (ADJP (JJ happy)))
	(S (NP (NP John) (SBAR (WHNP (WP who)) (S (VP (VBZ mumbles))))) (VP (VBZ dreams) (PP (IN about) (NP unicorns))))
	(S (VBZ Does) (NP (NP John) (SBAR (WHNP (WP who)) (S (VP (VBZ mumbles))))) (VP (VB dream) (PP (IN about) (NP unicorns))))i   i   s   corpus:s   < %s, %s  >R‘   t
   rootsymbolRª   R    iè  R1   t   dones	   sentence:i    s	   viterbi1:s	   viterbi2:RÔ   s   transformed1 (prob=%s): %ss   words:t    R¸   s   transformed2 (prob=%s): %s(   RŽ   R   Rã   t
   splitlinesR±   R   RW   R   R  Rp   Ry   R”   R   R    Rš   R   t	   raw_inputR-   R<  R   t   nbest_parseR¶   t   un_chomsky_normal_formR6  R5  RÒ   R›   R…   R+   RB   R°   (   R   RH   RT   R7  Rä   Rå   RŸ   R  R/   R’   R8  t   myparsert	   parsetreet   myparsetreeR9  t   transformedR%   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt	   interfaceZ  sf    )  	  
 	%


  
  c   %         s¡  | pk | o |  i  d t ƒ \ } } n |  i d t d t ƒ \ } } t | | d t d d d d ƒ}	 d GHn g  }
 g  } d	 } xt | ƒ D]ö\ ‰ } | o3 ˆ Gd
 Gd i | i ƒ  ƒ GHh d | i ƒ  6} nÔ ˆ Gd
 G| GHy t	 |	 i
 | i ƒ  ƒ ƒ } Wn( t j
 o } g  } ˆ Gd G| GHn Xt ƒ  } x@ | D]8 } | i ƒ  | i t i t | ƒ ƒ d | i ƒ  ƒq>Wd Gt | ƒ GHt | ƒ d	 j o | d 7} n t ƒ  } d	 } x>t | ƒ D]0\ } } | oxo t |  i | ƒ ƒ D]X \ } \ } } | i d i | i ƒ  ƒ d | | | ƒ| d 7} | d j o PqïqïWt o qÆd GHxu t |  i | d t ƒƒ D]X \ } \ } } | d 7} | i d i | i ƒ  ƒ d | | | ƒ| d j o PqwqwWqÆn xt |  i | d t d t ƒƒ D]ö \ } \ } } | oË | o2 | i t | ƒ i ƒ  d | | | p d ƒn… | | j o
 d	 j n o | | G| Gd G| G| GHn | i t | ƒ i ƒ  t d „  | i ƒ  Dƒ ƒ f d t | | ƒ | ƒ| d 7} n | d j o PqüqüWqÆW| o@| o8| o1t | i ƒ  d	 ƒ i ƒ  | j oxt |  i t | i ƒ  d	 ƒ d t d t ƒƒ D]Ô \ } \ } } | o© t | ƒ i ƒ  | j o | o2 | i t | ƒ i ƒ  d | | | p d ƒnJ | i t | ƒ i ƒ  t d „  | i ƒ  Dƒ ƒ f d t | | ƒ | ƒ| d 7} n | d j o PqfqfWn d G| GH| o‡ | o€ | o qÚt d „  | Dƒ ƒ } t | ƒ G| Gt ƒ  } x0 | i ƒ  D]" \ \ } } } | i | | ƒ qŸW| } t | ƒ GHn d „  ‰ | oG | o@ |
 i d i ‡ ‡ f d †  | i ƒ  Dƒ ƒ d ƒ |
 d GHnJ | o | o |
 i d | ƒ n& | o | o |
 i d  | ƒ n | i | ƒ q‘ Wd! GHd" „  ‰ d# „  ‰  | o_ ‡  f d$ †  } d% „  } t | d& ƒ i  t! | | ƒ ƒ t | d' d& ƒ i  t! | | ƒ ƒ d  Sg  } | D] } | ˆ | i" ƒ  i ƒ  ƒ q~ } dC \ } } g  } g  }  g  }! xBt t# | | ƒ ƒ D]+\ ‰ \ }" ‰ ˆ  |" ƒ oJ ˆ ˆ ˆ  |" ƒ ƒ i" ƒ  i ƒ  ƒ ˆ j o | d 7} qØ|! i ˆ ƒ n ˆ ‡ ‡ f d( †  |" i ƒ  Dƒ j o | d 7} n ˆ  |" ƒ o[ | i t$ ‡ ‡ ‡ f d) †  |" Dƒ ƒ ƒ |  i t% ˆ ˆ ˆ ˆ  |" ƒ ƒ i ƒ  ƒ ƒ ƒ qo| i t ˆ ƒ ƒ |  i t ˆ ƒ ƒ qoWt ‡  ‡ f d* †  t# | | ƒ Dƒ ƒ }# d+ |# t | ƒ t& |# ƒ t | ƒ d | t | ƒ t& | ƒ t | ƒ d | t | ƒ t& | ƒ t | ƒ d t' t( | ƒ t( ‡  ‡ ‡ f d, †  | Dƒ ƒ ƒ t) t( | ƒ t( ‡  ‡ ‡ f d- †  | Dƒ ƒ ƒ t* t( | ƒ t( ‡  ‡ ‡ f d. †  | Dƒ ƒ ƒ t+ | ƒ t d/ „  |  Dƒ ƒ t, |  ƒ t, | ƒ t, |! ƒ f }$ | p |$ d0 | 7}$ n |$ d1 t d2 „  | Dƒ ƒ 7}$ |$ GH|
 i |$ ƒ t | d& ƒ i  |
 ƒ h
 t& |# ƒ t | ƒ d d3 6t& | ƒ t | ƒ d d4 6t& | ƒ t | ƒ d d5 6t' t( | ƒ t( ‡  ‡ ‡ f d6 †  | Dƒ ƒ ƒ p d7 d8 6t) t( | ƒ t( ‡  ‡ ‡ f d9 †  | Dƒ ƒ ƒ p d7 d: 6t* t( | ƒ t( ‡  ‡ ‡ f d; †  | Dƒ ƒ ƒ p d7 d< 6t+ | ƒ d= 6t d> „  |  Dƒ ƒ d? 6| d@ 6t dA „  | Dƒ ƒ dB 6}$ |$ S(D   NR‘   R   t   cleanupRK  R   R    id   s   grammar donei    s   source:RM  i   s   parsing failedRm   s   parsetrees:iè  s   trying partialR¸   RÔ   RÓ   g¬÷N’~hs   
 =>c         s   s-   x& |  ] } d  | i  j o	 d Vq q Wd S(   RÞ   i   N(   R?   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Ï  s   	 c         s   s-   x& |  ] } d  | i  j o	 d Vq q Wd S(   RÞ   i   N(   R?   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>Ú  s   	 s   transformations: c         s   s   x |  ] \ } } | Vq Wd  S(   N(    (   R&   R'   R  (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>å  s   	 c         S   s   d i  |  i ƒ  ƒ S(   NRM  (   R+   RB   (   R:   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   wordsî  s    s   
c         3   s8   x1 |  ]* \ } } d  ˆ  t  | ƒ ˆ | ƒ f Vq Wd S(   s   %d. [p=%s] %sN(   R…   (   R&   R:   R%   (   R    RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>ñ  s   	 iÿÿÿÿs   not transformed: %s
s   not parsed: %s
RL  c         S   sP   d „  } |  o8 t  g  } |  D] } | t i | | ƒ d ƒ q ~ ƒ S|  Sd  S(   Nc         S   s(   |  d j o d S|  d j o d S|  S(   Ns   n'tRê   s   'sRé   (    (   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   fù  s
      Rz   (   Rb   R{   R|   (   R0   RY  RG   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR‚   ø  s    	 8c         S   s)   |  o t  |  i ƒ  d d „  ƒd Sd  S(   NRq   c         S   s   |  \ } } | S(    (    (   R&   R™   Rz   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`     s    i    (   R–   R„   (   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   fmax   s     c            s:   |  o+ ˆ  |  ƒ o ˆ  |  ƒ i  d d d ƒ d Sd Sd  S(   NR=  s   ()i    s   
(   t   _pprint_flat(   RH   (   RZ  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   strtree  s     c         S   s+   |  o d i  d „  |  Dƒ ƒ d Sd Sd  S(   Ns   
c         s   s3   x, |  ]% } | o | i  d  d d ƒ Vq q Wd S(   R=  s   ()i    N(   R[  (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>  s   	 s   

(   R+   (   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   strtrees  s     R5   Râ   c         3   s1   x* |  ]# } ˆ  ˆ | ƒ i  ƒ  i ƒ  ƒ Vq Wd  S(   N(   Rã   R¶   (   R&   R'   (   R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>  s   	 c         3   s4   x- |  ]& } t  ˆ ˆ  ˆ | ƒ i ƒ  ƒ ƒ Vq Wd  S(   N(   R   R¶   (   R&   R'   (   R‚   RX  R0   (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>  s   	 c         3   sC   x< |  ]5 \ } } | o" ˆ ˆ  | ƒ ƒ | j o	 d  Vq q Wd S(   i   N(    (   R&   RH   RT   (   RZ  RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>   s   	 sN  exact match:        %d of %d => %.2f %%
match ranked first: %d of %d => %.2f %%
match of any rank:  %d of %d => %.2f %%
f-measure: %.2f
precision: %.2f
recall: %.2f
average edit distance (of best matches): %.2f
sentences with edit distance < 1: %s
distances of first matches: %s
distances of best matches:  %s
indices of mistakes: %s
c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>0  s   	 c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>1  s   	 c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>2  s   	 c         s   s*   x# |  ] } | d  j o	 d  Vq q Wd S(   i   N(    (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>3  s   	 s   sentences with no parse: %d
s%   sentences with no transformation: %d
c         s   s$   x |  ] } | p	 d  Vq q Wd S(   i   N(    (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>7  s   	 s   exact match:s   match ranked first:s   match of any rank:c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>>  s   	 g        s
   f-measure:c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>?  s   	 s
   precision:c         3   sH   xA |  ]: } ˆ  | ƒ o' ˆ ˆ ˆ  | ƒ ƒ i  ƒ  i ƒ  ƒ Vq q Wd  S(   N(   Rã   R¶   (   R&   R'   (   RZ  R‚   RX  (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>@  s   	 s   recall:s(   average edit distance (of best matches):c         s   s*   x# |  ] } | d  j o	 d  Vq q Wd S(   i   N(    (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>B  s   	 s!   sentences with edit distance < 1:s   sentences with no parse:c         s   s$   x |  ] } | p	 d  Vq q Wd S(   i   N(    (   R&   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>D  s   	 s!   sentences with no transformation:(   i    i    (-   Rš   R   R”   RÚ   R    R*   R+   RB   Ra   R   RP  R¶   R6  R   RQ  Rf   R
   R—   R<  R%   R>   R°   R›   RÒ   Rs   R=   R   Ru   R–   R„   R)   t   opent
   writelinesRŽ   Rã   R±   R¢   R   Rg   R   Rw   R   R   R   R…   (%   R7  t   sentsortreest   goldt   resultsfilet   treest   myt	   bootstrapt   rulesR’   R8  t   resultst	   resultfdst   noparseRH   t
   parsetreest   parsetrees1R9  RT   t   resultfdR3   RŸ   t   nnR¨   R%   t   newfdR:   t   mmR\  R]  RG   t   lgoldt   distt   dist1t   wrongRZ   t   exactcntt   stats(    (   RZ  R0   R‚   RX  R    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   run™  s*   !	 	 
, 	  *
 	  
* 	" 2  :2  2 		 	3  			#3 +& )2(###..."999c          C   s3  d d  k  }  t d ƒ i ƒ  i ƒ  i ƒ  } t d ƒ i ƒ  i ƒ  i ƒ  } t d „  t d ƒ ƒ } t d „  t d ƒ ƒ } t d ƒ i ƒ  i ƒ  i ƒ  } t d	 ƒ i ƒ  i ƒ  i ƒ  } t d
 „  t d ƒ ƒ } t d „  t d ƒ ƒ } t d „  t d ƒ ƒ }	 t d „  t d ƒ ƒ }
 t t | | | | ƒ ƒ } g  } x4 | D], \ } } t | d t	 ƒt | d t	 ƒq>Wg  } xb |	 D]Z } | i
 t ƒ } | i d d d d d d ƒ | i d d d d d d ƒ | i | ƒ q{Wt } g  } | ot ƒ  } xS t | d  | d  ƒ D]9 \ } \ } } | GH| i t t | | ƒ d d ƒƒ qW| i d d „  ƒ d } xq | D]i \ } \ } } \ } } } } | | d j o7 d  | | | | t | ƒ f GH| GH| GH| d 7} qkqkW| GH| i t | | ƒ d ƒ d! GHn t a d" Gt GHt | | d | d d# ƒ d$ GHd  S(%   Niÿÿÿÿs   corpus/sentences-interr3.txts   corpus/sentences-decl3.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   L  s    s   corpus/trees-interr3.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   M  s    s   corpus/trees-decl3.txts   corpus/sentences-interr1.txts   corpus/sentences-decl1.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   P  s    s   corpus/trees-decl1.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   Q  s    s   corpus/trees-interr1.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   S  s    s   corpus/trees-decl4.txtc         S   s   t  t |  i ƒ  ƒ ƒ S(    (   Rs   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   T  s    s	   trees.txtRâ   t   factorR^   t
   horzMarkovi    t	   childCharRß   R_   iìÿÿÿR  iè  Rq   c         S   s   |  d  |  d S(   i   i   (    (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   ‡  s    i   s   %d. %d / %d = %fs   training doneR²   s   results1.txts   testing done(   t   cPickleR^  t   readRã   RN  RŽ   R   R±   RB  R›   RR   R   R‹   R)   RW   R*   Rp   R  R   Rµ   Rg   Rƒ   R²   Rv  (   Rz  t
   sentsintert	   sentsdeclt
   treesintert	   treesdeclt   newsentsintert   newsentsdeclt   newtreesdeclt   newtreesinterR  t   trees_tdop_parsedR   t   rightcorpusRH   RT   t   newtreesrightt   a1t   trainRh   R7  R    Rä   Rå   R'   t   b1t   c1RU   RV   R9  (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   runexpG  sd       	  !		c         C   s˜  g  } x‹|  D]ƒ} | d i  i ƒ  d j o
 d } n d } d | j o t | ƒ n" d | j o t | d t ƒn d | j o t | | ƒ n d	 | j o  | i d
 d d d d d ƒ n“ d | j o  | i d
 d d d d d ƒ nf d | j oX | i t ƒ } | i d
 d d d d d ƒ | i d
 d d d d d ƒ | i | ƒ n d | j o- t | ƒ d | j o t | d ƒ qq q W| S(   Ni    R«   i   i   t   foldmostt   foldallRâ   t   markauxt   rightbranchingRw  R_   Rx  Ry  Rß   t   leftbranchingR^   t   bothbranchingt   stripcnfmarksiÿÿÿÿ(	   R?   Rã   RB  R   RJ  R‹   RR   R)   R?  (   Rc  t   optionst   additionaltreesRH   t	   verbdepthRT   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt
   preprocess°  s4      
       
 c    '   „   C   sq  t  d ƒ i ƒ  i ƒ  i ƒ  }  t  d ƒ i ƒ  i ƒ  i ƒ  } t d „  t  d ƒ ƒ } t d „  t  d ƒ ƒ } |  t  d ƒ i ƒ  i ƒ  i ƒ  7}  | t  d ƒ i ƒ  i ƒ  i ƒ  7} | t d	 „  t  d
 ƒ ƒ 7} | t d „  t  d ƒ ƒ 7} d d d d d d d d d d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; d< d= d> d? d@ dA dB dC dD dE dF dG dH dI dJ dK dL dM dN dO dP dQ dR dS dT dU dV dW dX dY dZ d[ d\ d] d^ d_ d` da db dc dd de df dg dh di dj dk dl dm dn do dp dq dr ds dt du dv dw dx dy dz d{ d| d} d~ d d€ d d‚ dƒ d„ d… d† d‡ dˆ d‰ dŠ d‹ dŒ d dŽ d d g„ } d‘ d’ d“ f d” d• f d– d— d˜ d™ f dš d› f f } t dœ „  t | Œ  ƒ } t d i ƒ  ƒ t dž i ƒ  ƒ f } h  } x– | D]Ž } g  } | D] }	 | |	 i	 t
 ƒ q~ }
 g  } | D] }	 | |	 i	 t
 ƒ q1~ } t |
 | ƒ } t | | ƒ } |
 | | | f | | <qöWt dŸ „  | Dƒ ƒ } t t | ƒ ƒ } d } x¤t d ƒ D]–} t | | ƒ } g  } | D] }	 |	 | j o | |	 n qã~ } t | ƒ i t | ƒ ƒ t g  ƒ j p t ‚ x| D]} | | \ } } } } t | ƒ t | ƒ j o" t |  ƒ j o t | ƒ j n p t ‚ g  } | D] }	 | | |	 | |	 f q°~ } | o | o7 | g  } | D] }	 | | |	 | |	 f qð~ 7} n t ƒ  } xG t | ƒ D]9 \ } \ } } | GH| i t t | | ƒ d  d¡ ƒƒ q/W| i g  } | D] }	 | | |	 | |	 f q}~ ƒ d¢ GHd£ d¤ i | ƒ | f }  t | g  }! | D] }	 |! |  |	 qÑ~! g  }" | D] }	 |" | |	 qó~" |  d¥ t d¦ t ƒ}# | | d§ i |# ƒ ~ t ƒ  } xG t | ƒ D]9 \ } \ } } | GH| i t t | | ƒ d  d¡ ƒƒ qMW| i g  }$ | D] }	 |$ | |	 | |	 f q›~$ ƒ d¢ GHd¨ d¤ i | ƒ | f }  t | g  }% | D] }	 |% | |	 qï~% g  }& | D] }	 |& |  |	 q~& |  d¥ t d¦ t ƒ}# | | d© i |# ƒ ~ q@WqÃW| Gdª GHt ƒ  d  S(«   Ns   corpus/sentences-interr3.txts   corpus/sentences-decl3.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   È  s    s   corpus/trees-interr3.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   É  s    s   corpus/trees-decl3.txts   corpus/sentences-interr1.txts   corpus/sentences-decl1.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   Í  s    s   corpus/trees-decl1.txtc         S   s   t  |  i ƒ  ƒ S(    (   R   Rã   (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   Î  s    s   corpus/trees-interr1.txti   i   i   i   i   i   i	   i
   i   i   i   i   i   i   i   i   i   i   i   i   i   i   i    i"   i#   i$   i%   i(   i+   i2   i5   i6   i;   i=   i>   i?   i@   iA   iB   iC   iD   iE   iF   iH   iI   iJ   iK   iM   iP   iQ   iT   iW   iY   iZ   i[   i\   i]   i^   i_   i`   ia   ib   id   ie   if   ig   ih   ii   ij   il   in   io   ip   iq   ir   is   it   iu   iv   iw   ix   i{   i|   i   i€   i   i‚   iƒ   i„   i…   i‡   iˆ   i‰   iŠ   i‹   i   iŽ   i   i   i‘   i’   i“   i•   i—   i˜   i™   iš   i›   iœ   iž   i    i¡   i¢   i£   i¥   i¦   i§   i©   iª   i«   i¬   i­   i®   i¯   i°   i±   i²   i³   i´   iµ   i¶   i·   RŒ  R  t   nofoldRŽ  t   nomarksR  R  R‘  t   nobinarizationR’  t   cnfmarksc         S   s   d  |  j p
 d |  j S(   R™  Rš  (    (   R'   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`   Ù  s    s(   foldmost nomarks rightbranching cnfmarkss'   foldall markaux rightbranching cnfmarksc         s   s-   x& |  ] } | h g  d  6g  d 6f Vq Wd S(   s   decl->inters   inter->declN(    (   R&   t   param(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>ã  s   	 R  iÐ  s   training dones   results/%s-d%d.txtt   -Rc  Rd  s   inter->decls   results/%s-i%d.txts   decl->inters   

(   R^  R{  Rã   RN  RŽ   Rd   R   Rb   R¶   RR   R   R–  R}   R  R>   R   Rw   t   intersectionRÙ   RW   R*   Rp   R  R   Rƒ   R+   Rv  R›   R)   t
   printstats('   R|  R}  R~  R  t   filteredt
   parameterst
   proccorpusR›  RG   RH   t   declRK   t   intert   rdeclt   rinterRg  RÊ   t
   samplesizet   foldR0  R¥   Rˆ  t
   rtreesdeclt   rtreesinterRÉ   R   RË   R7  R    Rä   Rå   RÍ   t   filenameRÎ   RÏ   Ru  t   _[9]t   _[10]t   _[11](    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   tenfoldÄ  s~    ""ÿ “ ** 2/ I/7	 &6_	 &6_	c          C   sù  d „  }  d d  k  } t d „  ƒ } xP| i d ƒ D]?} t | i i d | ƒ ƒ i ƒ  i ƒ  d } g  } t d ƒ D]" } | d j o | | | qv qv ~ } g  } | D] } | | i	 d
 ƒ q¬ ~ } x; t d ƒ D]- } | | d i	 d ƒ d d  | | d <qØ Wg  } | D] \ }	 }
 | |	 t
 |
 ƒ f q~ } | i d d ƒ \ } } | | | d i t | ƒ ƒ q4 Wd GHx” | D]Œ } x‚ | | D]v } d Gd i | ƒ G| GHxW |  | | | ƒ i ƒ  D]; \ } } d | d d t | ƒ t | ƒ t | ƒ f GHqËWq”WHqƒWt d d ƒ } | i d i d „  d g | | i ƒ  d d d i ƒ  Dƒ ƒ d ƒ x | D]w } xn | | D]b } | i d i t d i | | f ƒ ƒ g t t t t |  | | | ƒ i ƒ  ƒ ƒ ƒ d ƒ qWqpW| i ƒ  d  S(   Nc            s   t  ‡  f d †  ˆ  d Dƒ ƒ S(   Nc         3   s>   x7 |  ]0 } | g  } ˆ  D] } | | | q ~ f Vq Wd  S(   N(    (   R&   R™   RG   RV   (   t   dicts(    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>  s   	 i    (   R}   (   R¯  (    (   R¯  s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   collate  s    iÿÿÿÿc           S   s
   t  t ƒ S(    (   R   R   (    (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyR`     s    Rg  ióÿÿÿi   i   i	   i
   t   :i   i   s   =>iþÿÿÿRœ  i    s   10 foldss   	RM  s   %s %s %f (%f)i(   s   results.csvR5   t   ,c         s   s   x |  ] } t  | ƒ Vq Wd  S(   N(   R…   (   R&   RH   (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pys	   <genexpr>$  s   	 R   R4   s   
s   , (   i   i	   i
   (   t   osR   t   listdirR^  t   pathR+   R{  RN  R  R¶   Rg   t   rsplitR)   R}   R„   R>   R   R   t   writeRu   R…   RŽ   R‰   t   close(   R°  R³  Rg  RH   t   linesRG   R'   RK   R¥   R™   Rz   R›  t	   directionRq   R†   t   out(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyRž    s@    	 +<* +3#   7G  dc          C   sÏ   d d  k  }  |  i t d d ƒ ƒ } x¤ | i ƒ  D]– \ \ } } } } d G| GHd G| GHd G| GHd G| GHd Gd	 i | i ƒ  ƒ GHt | | | ƒ } | | j o% d
 G| GHd Gd	 i | i ƒ  ƒ GHn Hq1 Wd  S(   Niÿÿÿÿs   mangled.picklet   rbRÑ   R:   RÆ   s   former results   former leaves:RM  s   current results   current leaves:(   Rz  t   loadR^  R„   R+   RB   R·   (   Rz  R[   RÑ   R:   RÆ   R¨   t	   newresult(    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt	   mungetest+  s     					t   __main__NRÓ   t   optionflagss   %d doctests succeeded!s8   printstats runexp tenfold interface mungetest test test2s   ()(\   R‘   R    t   nltkR   R   R   R   R   R   R   R   R	   R
   t   nltk.metricsR   R   R   t	   itertoolsR   R   R   R   t   mathR   t   operatorR   t   collectionsR   t   randomR   t   sysR   t   heapqR   R   R   t   numpyR   R   R   Rî   R³   R›   R²   R{   R*  R;   R-   RD   R+   RW   R   R  R·   R  Rí   Rc   Rm   R¯   R&  R)  R  R  Rð   Rs   Rñ   R-  R,   Rì   R0  R:  RŠ   R<  R?  RB  RJ  RV  Rv  R‹  R–  R®  Rž  R¿  RÜ   t   doctestt   testmodt   NORMALIZE_WHITESPACEt   ELLIPSISt   failt	   attemptedR¶   t   eval(    (    (    s5   /home/andreas/ai/sslp/dop-transformations/treelink.pyt   <module>   sz   @"	 	ÿ Z]2	1																			+					?®	i		H		) 