# Bachelor AI project 2009. Andreas van Cranenburgh 0440949.
# Two-word stage dialogue simulator

try:
	from free_will import choice
except:
	from random import choice
from string import uppercase
try:
	from nltk import edit_dist
except:
	#edit distance of two strings
	def edit_dist(source, target):
		def substcost():
			if a == b: return 0 
			else: return 2

		#initialize distance matrix
		distance = [[0 for a in range(len(target)+1)] for b in range(len(source)+1)]
		for i, row in enumerate(distance): row[0] = i
		for j in range(len(distance[0])): distance[0][j] = j
		distance[0][0] = 0

		#populate distance matrix
		for i, a in enumerate(source):
			for j, b in enumerate(target):
					insc = distance[i][j+1] + 1
					substc = distance[i][j] + substcost()
					delc = distance[i+1][j] + 1
					distance[i+1][j+1] = min((substc, insc, delc))
		return distance[-1][-1]

#return all substrings of a sequence, in descending order of length:
def substr_iter(seq):
	for a in range(len(seq), -1, -1):
		for b in range(a, len(seq)):
			yield seq[b-a:b+1]

#reverse dictionary, ie. swap values and keys. 
#since a value may be occur with multiple keys, 
#return a list of all keys associated with a value.
def revdict(d):
	#ignore multiple keys for a value:
	#return dict(a[::-1] for a in d.items())
	return dict((a, [b for b,c in d.items() if c==a ]) for a in d.values())

def pred(token):
	if '(' in token: return token[:token.index('(')]
	else: return token

def arg(token):
	if '(' in token: return token[token.index('('):]
	else: return ''

#test whether meaning has a family resemblance with partialmeanings, if so,
#perform a substitution. eg. "question: animal(bunny) do(X)" and 
#"assertion: point(dog) animal(dog)" will succeed because of the shared element
#"animal", substituting "bunny" with "dog".
def unifiespartially(meaning, partialmeaning):
	#pm is wrapped in a list so that it can enjoy side-effects
	pm = partialmeaning[0]
	if pm == '':
		partialmeaning[0] = meaning
		return True
	success = False
	#try to match as much as possible
	for part in meaning.split():
		if pred(part) in pm:
			#if there is an argument and it's not variable, substitute
			if arg(part) and arg(part)[1] not in uppercase and arg(part) not in pm:
				pr = pred(part)
				oldarg = pm[pm.index(pr+'(') + len(pr):pm.index(')', pm.index(pr+'(')) + 1]
				debug("substituted", oldarg, "with", arg(part))
				partialmeaning[0] = pm.replace(oldarg, arg(part))
			success = True
			return success #no further matching
		#probe for variable predicate in pm
		vars = [pred(a) for a in pm.split() if pred(a) in uppercase]
		if vars:
			partialmeaning[0] = pm.replace(vars[0], pred(part))
			debug("instantiated variable predicate with", pred(part))
			#recurse to replace further vars
			return unifiespartially(meaning, partialmeaning)
		"""
		elif arg(part) in pm and arg(part):
			ar = arg(part)
			oldpred = pm[pm.index(ar):pm.index(ar)+len(ar)]
			print ar
			print oldpred
			exit()
			print "substituted", oldpred, "with", pred(part)
			partialmeaning[0] = pm.replace(oldpred, pred(part))
			return True
		"""
	return success #False

#succeed if everything in "partialmeaning" is compatible with "meaning",
#substituting along the way
def unifiesfully(meaning, partialmeaning):
	#pm is wrapped in a list so that it can enjoy side-effects
	pm = partialmeaning[0]
	if pm == '':
		partialmeaning[0] = meaning
		return True

	for part in pm.split():
		if pred(part) not in meaning:
			return False
		elif part not in meaning and arg(part):
			pr = pred(part)
			newarg = meaning[meaning.index(pr+'(') + len(pr):meaning.index(')', meaning.index(pr+'(')) + 1]
			debug("substituted", arg(part), "with", newarg)
			partialmeaning[0] = pm = pm.replace(arg(part), newarg)
			#pm = partialmeaning[0]
	return True

#return semantic representation for linguistic utterance
#by stitching together the best matching fragments
def interpret(utterance, examplars, lexicon):
	words, partialmeaning = utterance.split(), ['']
	l = len(words)
	while words:
		br = False
		#for a in range(len(words), 0, -1):
		for sub in substr_iter(words):
			#eg. "this [=ball]" should add the meaning of "ball"
			if words[0][0] == '[':
				if words[0][2:-1] in lexicon:
					if unifiespartially(lexicon[words[0][2:-1]], partialmeaning):
						debug("demonstrative dereferenced:", partialmeaning[0])
					else:
						partialmeaning[0] += ' ' + lexicon[words[0][2:-1]]
						debug("appended demonstrative:", partialmeaning[0])
				words = words[1:]
				br = True
				break
			#allow to skip words not in lexicon
			#if len(words[:a]) == 1 and words[0] not in lexicon:
			#	words = words[a:]
			#	br = True
			#	break
			#substring = " ".join(words[:a])
			substring = " ".join(sub)
			for b in examplars:
				pm = partialmeaning[0]
				#check whether the substring occurs and match whole words only
				if substring in b and set(substring.split()).issubset(b.split()) and unifiespartially(examplars[b], partialmeaning):
					matches = []
					#remove all words in examplar from queue
					#for c in b.split():
					for c in sub:
						if c in words: 
							words.remove(c)
							matches.append(c)
					debug('	', repr(' '.join(matches)), 'in', repr(b))
					if pm: debug('	and', repr(examplars[b]), 'matches', repr(pm))
					else: debug('meaning initialized as:', examplars[b])
					br = True
					break
			if br: break
		#skip over words not in corpus
		if br == False:
			if words[0] not in lexicon:
				debug("skipping", words[0])
				words = words[1:]
			else:
				return '' #partialmeaning[0]

	return partialmeaning[0]

#transform a meaning into a response
def response(meaning, examplars):
	if 'imperative' in meaning:
		return 'confirmation'
	elif 'ynquestion' in meaning:
		return choice(('confirmation', 'denial'))
	elif 'whquestion' in meaning:
		m = 'assertion:' + meaning.split(':', 1)[1]
		# edit distance on clauses:
		d = [(edit_dist(m.split(), a.split()), a) for a in examplars.values()]
		if min(d)[0] < len(meaning):
			r = choice([a for a in d if a[0] == min(d)[0]])[1]
			m = [m]
			if unifiesfully(r, m):
				return m[0]
			elif unifiespartially(r, m):
				return m[0]
		else:
			return ''
	#here we can decide to take spontaneous initiative (eg. "more juice!")
	elif 'assertion' in meaning:
		return 'acknowledgement'

	return ''

#transform a meaning into a two word utterance
def express(meaning, examplars):
	if meaning in examplars.values():
		return choice(revdict(examplars)[meaning])
	else:
		#find approximation
		# edit distance on clauses:
		d = [(edit_dist(meaning.split(), a.split()), b) for b, a in examplars.items()]
		if min(d)[0] < len(meaning):
			return choice([a for a in d if a[0] == min(d)[0]])[1]
		else:
			return '0'

def express2(meaning, examplars, lexicon):
	if meaning in examplars.values():
		return choice(revdict(examplars)[meaning])
	if ':' in meaning: meaning = meaning.split()[1]
	topic = arg(meaning.split()[0])
	comment = pred(meaning.split()[0])
	revlex = revdict(lexicon)
	utt = ''
	utt = choice(choice([b for a,b in revlex.items() if comment in a]))
	if utt == None: utt == ''
	debug('comment:', comment, 'in', utt)
	if choice([0, 1]): return
	utt = choice(choice([b for a,b in revlex.items() if topic in a])) + ' ' + utt
	debug('topic:', topic, 'in', utt)
	return utt

#A mutable default argument is essentially a static variable (yay for arcana!)
def inferlexicon(examplars, verbose=False, lexicon={}):
	def inters(a,b): 
		return set(a).intersection(set(b))
	mut = False
	for word in set(" ".join(examplars.keys()).split()) - set(lexicon.keys()):
		utterances = [a.split() for a in examplars.keys() if word in a.split()]
		meanings = [b.split() for a,b in examplars.items() if word in a.split()]

		#reduce meanings by matching words with meanings:
		for a in list(reduce(inters, meanings[1:], meanings[0]))[::-1]:
			if word in a:	#cheating?
				lexicon[word] = a
				mut = True
				break
		#if this didn't succeed and the set of examplars which contain the word
		#reduces to a single meaning, use this:
		if word not in lexicon:
			if len(reduce(inters, utterances, utterances[0]) - set(lexicon.keys())) == 1:
				lexicon[word] = reduce(inters, meanings, meanings[0])
				if len(lexicon[word]) == 0: lexicon[word] = ''
				else: lexicon[word] = lexicon[word].pop()
				mut = True
	if verbose:
		debug('lexicon: not found:', set(" ".join(examplars.keys()).split()) - set(lexicon.keys()), '\n')
	# recurse as long as new meanings are inferred:
	if mut:
		return inferlexicon(examplars, verbose) 
	return lexicon

def debug(*m):
	for a in m: print a,
	print

def test(examplars, lexicon):
	test = ['what does a dog do ?',
		'want some chocolate ?',
		'kick the ball !',
		'throw the ball',
		'want some bunny',
		'what does this [=bunny] animal do ?']
	for a in test:
		print a
		print repr(interpret(a, examplars, lexicon))
		print
	return
def getexamplars():
	#corpus
	return {'uh': 					'',
		'eh': 					'',
		'ah': 					'acknowledgement',
		'ok': 					'confirmation',
		'yes': 					'confirmation',
		'yeah': 				'confirmation',
		'no': 					'denial',
		'ball': 				'assertion: point(ball) toy(ball)',
		'throw it to me': 		'imperative: throw(X) toy(X)',
		'you kick the football !':	'imperative: kick(football) toy(football)',
		'what is this ?': 		'whquestion: point(X) Y(X)',
		'what does a bunny do ?': 	'whquestion: do(X) animal(bunny)',
		'what does a cow say ?': 'whquestion: do(X) animal(cow)',
		'what\'s a kitty say ?': 'whquestion: do(X) animal(cat)',
		'that\'s a kitty'		: 'assertion: point(cat) animal(cat)',
		'meouw'					: 'assertion: do(meouw) animal(cat)',
		'moo'					: 'assertion: do(moo) animal(cow)',
		'what animal does woof woof ?': 	'whquestion: animal(X) do(woof)',
		'bunny': 				'assertion: animal(bunny) do(hop)',
		'dog': 					'assertion: animal(dog) do(woof)',
		'hop': 					'assertion: do(hop) animal(bunny)',
		'woof woof': 			'assertion: do(woof) animal(dog)',
		'want some juice ?': 	'ynquestion: want(juice) food(juice)',
		'chocolate': 			'assertion: food(chocolate)'
		}

def main():
	#currently unused:
	ontology = {'thing': {
			'toy': {'ball':{}, 'football':{}}, 
			'food': {'juice':{}, 'chocolate':{}}, 
			'animal': {'bunny':{}}, 
			'person':{'boy':{}, 'woman':{}}
			},
		'activity': {'throw': {}, 'kick': {}}
		}
	examplars = getexamplars()
	print "Child dialogue simulator. Enter parent utterance."
	print
	print "Utterances in corpus: "
	print examplars.keys()
	print
	lexicon = inferlexicon(examplars, True)
	print 'Lexicon (distilled from corpus): ', lexicon

	test(examplars, lexicon)
	discourse, topic = [('','')], ''
	#main loop
	while True:
		print "Parent: ",
		utt = raw_input()
		#append last utterance? convolution, recurrent.
		meaning = interpret(utt, examplars, lexicon)
		debug('	interpretation:', meaning)
		reaction = response(meaning, examplars)
		debug('	reaction:', reaction)
		reaction = express2(reaction, examplars, lexicon)
		print "Child: ", reaction
		discourse.append((utt, meaning))
		common = set(discourse[-1][1].split()[1:]).intersection(set(discourse[-2][1].split()))
		if len(common) != 0:
			topic = common.pop()
			print 'topic:', topic
if __name__ == '__main__': main()