mò
m_yFc           @   sç   d  Z  d k l Z d k l Z d k Z d „  Z d „  Z d „  Z d d „ Z	 d	 „  Z
 d
 „  Z d „  Z d „  Z d „  Z d e d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e d j o e ƒ  n d S(   sk  
	OVERVIEW

# Important data structures:
# - Dictionary with words as keys, and lists tuples (amount, framehash) as
#   values
eg. associations['ball'] = [ (0.45, _frameXYZ), (0.33, _frameABC), ... ]

# - Dictionary with framehashes as keys, and the real frames as values
frameindex[hash1234] = _frameXYZ

#Initialization functions
readcorpus()
parseutterances()

#Print functions
printframe()
printprop()
printsituation()
frametostring() # create a unique, sorted string

#Learn Functions
createsubframes() <-- formerly known as 'abstractions'
associate() <-- formerly known as 'speech'

#Test Functions
done in main?
(   s   minidom(   s   stdinNc          C   s  d GHt  i d ƒ i }	 t |	 ƒ \ }  } d Gt i ƒ  } xÄ | i
 ƒ  D]¶ } | |  j oš |  | } | GHg  } | D] } | | | | f qs ~ } | i d t ƒ xV t | d  ƒ D]7 \ } }
 d G| d Gd G|
 d	 GHt | |
 d ƒ q´ WqF | Gd
 GHqF W|	 i ƒ  d S(   sw   
	then read words from stdin and find matching frames
	(if this file is not called directly, main() will be ignored).
	s÷   Language Acquisition, one-word model. 2nd Year project UvA 2007
This program is not distributed in the hope that it will be useful,
so WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
s
   corpus.xmls   Talk to me: t   reversei   t   matchi   s   score:i    s   not in corpus.N(   t   minidomt   parset   documentElementt   xmldoct   onewordt   associationst
   frameindext   stdint   readlinet   textt   splitt   wordt   datat   _[1]t   fhasht   listt   sortt   Truet	   enumeratet   iR   t
   printframet   unlink(   R   R   R   R   R   R   R   R   R   R   R   (    (    t#   /home/andreas/ai/2p/laac/oneword.pyt   main#   s(      
+ 
c         C   s–   d GH|  i d ƒ } h  h  } } d GHxB | D]: } t | | ƒ } t | | ƒ } t
 | | | d d ƒq- Wd GHt | t | ƒ t | ƒ ƒ | | f S(   sŸ   
	parse corpus and generate derived frames,
	parameter: use: xmldoc = minidom.parse("corpus.xml").documentElement
	return: a tuple (associations, frameindex)
	s   oneword: Reading corpus data..t	   situations   oneword: Analyzing situations..t   methodi   s   oneword: Correcting scores..N(   R   t   getElementsByTagNamet
   situationsR   R   t   sitt   parseutterancest
   utterancest   derivemeaningst   meaningst	   associatet   correctassociationst   len(   R   R   R"   R    R   R   R   (    (    R   R   G   s      c   
         sc  d „  ‰  ‡ ‡  ‡ d †  ‰ t i ƒ  ‰ y t |  ƒ i ƒ  } Wn t	 j
 o |  } n Xˆ i
 d ƒ } | i | i d d ƒ ƒ ˆ i
 d ƒ } | i d d ƒ yB | i t | ƒ i d d ƒ ƒ | i t | ƒ i d d ƒ ƒ Wn t	 j
 o n Xˆ i
 d ƒ } | i | ƒ | | g } | i ˆ  | ƒ ƒ | i ˆ  | ƒ ƒ | ˆ | ƒ 7} t | | ƒ S(
   s=   
	for a given situation, return a list of derived meanings.
	c         C   sÎ   |  i d d ƒ } y# t t | ƒ i ƒ  ƒ i d } Wn t j
 o g  Sn Xt	 t | ƒ i ƒ  ƒ i d } t	 t | ƒ i ƒ  ƒ i | i d d ƒ | ƒ t | ƒ i ƒ  i t t | ƒ i ƒ  ƒ ƒ | g S(   sq   
		try to rename abstraction to an id element and delete the
		old id element, otherwise return an empty list.
		t   deepi   i    N(   t   framet	   cloneNodet   aframet   abstt   framest   nextt
   childNodest   at   StopIterationt   idt   bt   replaceChildt   removeChild(   R'   R.   R1   R)   (    (    R   t   abstractiontoidb   s     #	.+c            s  g  } x® t |  ƒ D]  } ˆ i d ƒ } | i | i d d ƒ ƒ } | i
 d d ƒ x- t |  ƒ D] } | i | i d d ƒ ƒ q` W| i ˆ | ƒ ƒ | i | ƒ | ˆ  | ƒ 7} q WxI t |  ƒ D]; } ˆ i d ƒ } | i | i d d ƒ ƒ | i | ƒ qÄ W| S(   s)   
		recursively generate derived frames
		t   meaningR&   i   t   namet   subN(   t	   solutionsR+   R'   R.   t   Kreatort   createElementt   newframet   appendChildR(   R7   t   setAttributet
   propertiesR1   t   extendR4   t   appendt   recursiveframes(   R'   R.   R1   R7   R8   R;   (   RA   R4   R9   (    R   RA   q   s&        R5   R&   i   R'   R6   t   actionN(   R4   RA   R   t   DocumentR9   R+   R   R,   t   originalframeR/   R:   t   fullmeaningR<   R(   t
   emptyframeR=   R0   R*   t   emptymeaningR8   R?   t
   makehashesR   (
   R   R   RE   RG   RF   RD   R4   R8   RA   R9   (    (   R4   RA   R9   R   R!   ^   s0     	#i    c         C   s®   x§ |  D]Ÿ } xE | D]= } | | | j o d | | | <q | | | c d 7<q Wd } | d j o> x; | | D]+ } | | j o | | | c d 8<qs qs Wq q Wd S(   s‚   
	compute scoring between words and frames
	if the method argument > 0 unrelated frames
	will have their associations decreased.
	i   i   i    N(   R    R.   R"   R1   R   R   (   R    R"   R   R   R.   R1   (    (    R   R#   Ÿ   s        c         C   su   xn |  i ƒ  D]` \ } } t g  } | D] } | | | q' ~ ƒ } x& | D] } t | | ƒ | | | <qK Wq Wd S(   s'   
	do some math to correct associatons
	N(	   R   t   itemsR   R   t   sumR   t	   framehasht   totalt   float(   R   t	   wordcountt   meaningcountR   R   RL   R   RK   (    (    R   R$   µ   s      +  c         C   sU  d i  t |  ƒ ƒ } | i d d ƒ i ƒ  } d } g  } | d j o; xÜ | D], } | d d j o | i
 | d ƒ qM qM Wn¥ | d	 j oj x” | D][ } | d d j o7 | i
 | d ƒ | i
 | d ƒ | i
 | d ƒ q• | i
 | ƒ q• Wn. g  } | D] } | | i d d ƒ q~ } x) | D]! } | | j o h  | | <q,q,W| S(   sì   
	Create a list of single words of all the adult utterances combined,
	after stripping unwanted characters. If seperate utterances are needed
	change this.  As this is the oneword stage, sentence boundaries are
	currently meaningless.
	t    t   ?t    t   defaultt   ignorei    t   !i   t	   increase3N(   t   joint   adultutterancesR   t	   utterancet   replaceR   t   uttsR   t   correctedwordsR.   R@   R   R   (   R   R   R.   R[   R\   R   RY   R   (    (    R   R   Æ   s0       - c         C   sO   g  } xB |  D]: } t | ƒ } | | j o | | | <n | i | ƒ q W| S(   N(   t   outputR"   R.   RK   R1   R   R@   (   R"   R   R.   R1   R]   (    (    R   RH   ñ   s     c         C   s­   |  i d ƒ d } d G| i d i GHx t |  ƒ D] } t | ƒ q3 Wx_ |  i D]T } | i d j o d G| i d i GHqQ | i d j o d G| i d i GHqQ qQ Wd S(	   s:   
	print a situation's description, frames and utterances
	t   descriptioni    s    DESC:t   adults   adult:t   childs   child:N(	   R   R   R^   R-   R   R+   R.   R   t   nodeName(   R   R.   R^   (    (    R   t   printsituationû   s      
 c         C   s   t  |  ƒ GHd d GHd  S(   NiO   t   -(   t
   frametostrR'   (   R'   (    (    R   R     s    c         C   sÃ  g  } |  i d j o
 d } no |  i d j oD | d d } | t j o | |  i d ƒ d 7} qŽ | d 7} n |  i d	 j o
 d
 } n y` | | d d d t |  ƒ i d i	 d 7} | | d d d t
 |  ƒ i d i	 d 7} Wn t j
 o n Xd „  } t t |  ƒ ƒ } | i | ƒ xS | D]K } | | d d d 7} | | i d ƒ d 7} | | i d i	 d 7} q3Wx1 t t |  ƒ ƒ D] } | t | | d ƒ 7} q•Wd i | ƒ S(   s^   
	make 'human readable' string of a frame, for both pretty-printing
	and finding duplicates.
	R5   s	   MEANING:
R'   s   	s   FRAME: R6   s   
s   void
R   s   SITUATION:
i   s   ID: i    s   ABSTR: c         C   s/   |  i d ƒ | i d ƒ j  o t Sn t Sd  S(   NR6   (   t   prop1t   getAttributet   prop2R   t   False(   Re   Rg   (    (    R   t   cmppropkeys&  s    s   PROP: s    = RR   N(   t   resultR'   Ra   t   nestingt
   removenameRh   Rf   R0   R-   R   R*   R/   Ri   R   R>   t   propsR   R.   t   sortedR+   Rd   RW   (   R'   Rk   Rl   R.   Ri   Rm   Rj   (    (    R   Rd     s8     

.2	  c         C   s   t  t |  d t ƒƒ S(   sÄ   
	generate a hash value of a frame, by converting it to a string
	representation and hashing that. The conversion is used to do a
	"deep" conversion, instead of just comparing object references.
	Rl   N(   t   hashRd   R'   R   (   R'   (    (    R   RK   6  s     c         C   s   t  d |  ƒ i ƒ  S(   NR0   (   t   elementiteratorR'   R,   (   R'   (    (    R   R0   >  s    c         C   s   t  d |  ƒ i ƒ  S(   Nt   abstraction(   Rp   R'   R,   (   R'   (    (    R   R*   @  s    c         C   s   t  d |  ƒ S(   NR'   (   Rp   R'   (   R'   (    (    R   R+   C  s    c         C   s   t  d |  ƒ S(   Nt   prop(   Rp   R'   (   R'   (    (    R   R>   E  s    c         c   s,   x% t  d |  ƒ D] } | i d i Vq Wd S(   s+    return utterances as strings (hacky code) R_   i    N(   Rp   R'   R.   R-   R   (   R'   R.   (    (    R   RX   G  s      c         c   s0   x) | i D] } | i |  j o | Vq
 q
 Wd S(   s7    iterator over elements with a specific tag in a frame N(   R'   R-   R.   Ra   t   tag(   Rs   R'   R.   (    (    R   Rp   M  s
     
 t   __main__(   t   __doc__t   xml.domR   t   sysR	   t   mathR   R   R!   R#   R$   R   RH   Rb   R   Rh   Rd   RK   R0   R*   R+   R>   RX   Rp   t   __name__(   R   RX   R+   Rp   R0   R!   R   R   R   Rx   R*   Rd   R#   RK   R>   R   R$   R	   RH   Rb   (    (    R   RQ      s,   		$		A		+	
		&							