<% import sys, os os.chdir("/var/www/https/andreas") sys.path.append(".") import lg from discodop.treedraw import DrawTree from discodop.tree import Tree def ustr(a): return str(a).replace(r'\\', '\ '[0]) %>

Lexicalist type driven semantic grammar

Andreas van Cranenburgh 2010.

Abstract: a semantically lexicalist (i.e., semantic ambiguities derive from the lexicon) type driven (i.e., direction of function application is dynamically inferred from types) semantic (i.e., output is semantic, in this case a logical form) grammar. The syntax is parsed by a generative grammar (in this case a CFG, other formalisms like DOP are possible), leaves of the resulting parse tree are annotated with all possible meanings from the lexicon; the resulting parse forest is pruned by removing type clashes. So broadly speaking, syntax is generative, semantics is selective. Two types of ambiguity are modelled: syntactic (different parse trees) and semantic (different mapping of words to meanings). Basically this takes the output of a CFG and replaces the leaves with lambda expressions, after which the expressions of siblings are applied to each other recursively, storing the result in their parent, until finally the root node contains the semantic interpretation of the sentence. Example sentences demonstrate handling of scope ambiguity (although this depends on a lexical ambiguity in the verb, hence 'lexicalist'), and a PP attachment ambiguity (deriving from both a syntactic and lexical ambiguity).

Source code:


 
">
    <% def p(a): #req.write(a) if ';' in a: return [lg.lp.parse(b, lg.types) for b in a.split(';')] else: return [lg.lp.parse(a, lg.types)] if 'lexicon' in form: #lg.types = dict(a.split(' : ') for a in form['types'].split('\n') if ':' in a) #req.write(str(lg.types)) lg.grammar = lg.nltk.parse_cfg(form['grammar']) lg.lexicon = dict((a.split(' : ')[0], p(ustr(a.split(' : ')[1]))) for a in form['lexicon'].split('\n') if ':' in a) def tr(node): a, b, c = node.split('\n') return "{\\begin{tabular}{c} %s \\end{tabular}}" % ' \\\\\n'.join((a, "$ %s $" % b.replace("\\","\lambda "), "$ %s $" % c.replace("<","\langle ").replace(">","\\rangle "))) def synt(tree): #not all parens return "\synttree%s" % str(tree).replace('(',' [ ').replace(')',' ] ') def oneline(node): return "=".join(node.split('\n')[:2]).replace( ' ', '') def brackets(tree, siblings): if type(tree) == lg.nltk.Tree: return lg.nltk.Tree(oneline(tree.node), [brackets(a, not any(type(b) == lg.nltk.Tree for b in tree)) for a in tree]) else: #if siblings or True: # return "[%s]" % oneline(tree) return oneline(tree) def h(a): return a.replace('&', '∧').replace('\\', 'λ').replace( 'exists', '∃').replace('|', '∨').replace( u'all', u'∀').replace(u'->', u'→') def u(a): return unicode(a).replace(u'\\', u'λ').replace(u'exists', u'∃').replace( u'&', u'∧').replace(u'|', u'∨').replace( u'all', u'∀').replace(u'->', u'→') if 'sentence' in form: from urllib import quote req.write("%s\n" % form['sentence']) for n, (lf, semtree) in enumerate(lg.interpret(form['sentence'], lg.grammar)): if type(semtree) == lg.nltk.Tree: st = brackets(semtree, len(semtree) > 1) st = st.pprint(margin=70, parens='[]') #st = st.pprint_latex_qtree()[6:].replace('[.', '[').replace('\n', '').replace(' ', '') #for a in semtree.treepositions(): # if type(semtree[a]) == lg.nltk.Tree: # semtree[a].node = tr(semtree[a].node) # else: # semtree[a] = tr(semtree[a]) #semtree = semtree.pprint_latex_qtree().replace('[.', '[').replace('\Tree','\synttree') stq = quote(st) #stu = h(st) stu = u(st).encode('iso8859-1') stx = Tree.parse(u(st), brackets='[]') req.write("""
  1. %s (tree)
    """ % ( lf.replace('<', '<').replace('>', '>'), n, n)) #req.write(""" # # # """ % (lf.replace('<', '<').replace('>', # '>'), n, n, stq, stu)) # alternative ways of including SVGs: # # #req.write(DrawTree(stx).svg().encode('iso8859-1')) #req.write('
    %s
    ' % DrawTree(stx).text(html=True).encode('iso8859-1')) req.write('
    %s
    ' % stu) req.write('\n') else: # some error probably ocurred st = semtree lf = str(lf) # req.write("
  2. %s" % lf.replace('<','<').replace('>', '>')) else: examples = [ "every girl gives mary a book", "john hits a boy with a book", "john smokes and bill does too", "john loves his wife", "john loves his wife and bill does_too", "every girl loves john or bill"] req.write('

    Try some example sentences:

    \n') for a in examples: req.write('
  3. %s' % (a.replace(' ','+'), a)) %>