'''
Wikicreole style parser.  Mostly complient.
Written 2010-12-19. danomagnum.com
'''
__version__ = 1.0


import re
import math

USE_TOC = True #should we even make TOCs?
TOC_LENGTH = 1000 #Length an article needs to be before we worry about TOCing it.

def tocmaker(contents):
	'''Creates a toc from a list of ids'''
	ret = ''
	if len(contents) > 3:
		ret = "<div id='toc'><Span id='toc_header'>Contents</Span><ul>"
		current_level = min([level for text, level in contents])
		for text, level in contents:
			delta = level - current_level
			if delta > 0:
				for l in range(delta):
					ret += '<ul>'
			elif delta < 0:
				for l in range(-delta):
					ret += '</ul>'
			current_level = level

			ret += "<li><a href='#" + text + "'>" + text + "</a></li>"
		ret += '</ul></div>'
	return ret

def parse(text):
	'''Parses text and returns html according to (almost) wikicreole
	\'\'italic\'\'   \'\'\'bold\'\'\'   __underline__
	[[link]]   [[link|linktext]]  {{image}}  {{image|imagetext}}

	*list
	**list2
	#numbered list
	##numbered list 2
	==Heading 1
	===Heading 2
	\\\\ -> <br>
	---- -> <hr>
	'''


	in_pre = False
	in_ulist = 0
	in_olist = 0

	in_table = False


	formattings = [['**','b',False],['//','i',False],['__','u',False]]

	outstring = ''

	contents = []

	headings = [('=====','h5', 5),('====','h4', 4),('===','h3', 3),('==','h2', 2),('=','h1', 1)]

	re_image_1 = re.compile(r'\{\{([^\|^\}]+?)\|(.+?)\}\}')
	re_image_1s = r'<img src="\1" title="\2" class="imginpage">'

	re_image_2 = re.compile(r'\{\{([^\|^\}]+?)\}\}')
	re_image_2s = r'<img src="\1" class="imginpage">'

	re_outlink_1 = re.compile(r'\[\[([^\|^\]]+?)\|(.+?)\]\]')
	re_outlink_1s = r'<a href="\1">\2</a>'

	re_outlink_2 = re.compile(r'\[\[([^\|^\]]+?)\]\]')
	re_outlink_2s = r'<a href="\1">\1</a>'

	re_pre_inline = re.compile(r"\{\{\{(.+?)\}\}\}")
	re_pre_inline_s = r'<span class="inline_pre">\1</span>'

	for line in text.split('\n'):
		if in_pre:
			if line.startswith('}}}'):
				outstring += '</pre>'
				in_pre = False
			else:
				outstring += line + "\n"
			continue



		line = line.lstrip()

		if line.startswith('#'): #check for ordered lists
			if not in_olist:
				outstring += "<ol>"
				in_olist = 1
			else:
				level = len(line[:in_olist+1].split('#')) - 1
				delta = math.fabs(level - in_olist)
				while delta:
					level = len(line[:in_olist+1].split('#')) - 1
					if level > in_olist:
						outstring += "<ol>"
						in_olist += 1
					else:
						outstring += "</ol>"
						in_olist -= 1

					delta = math.fabs(level - in_olist)

			line = "<li>" + line[in_olist:] + "</li>"
		elif in_olist:
			while in_olist:
				outstring += "</ol>"
				in_olist -= 1

		if line.startswith('*'): #check for unordered lists
			if not in_ulist:
				outstring += "<ul>"
				in_ulist = 1
			else:
				level = len(line[:in_ulist+1].split('*')) - 1
				delta = math.fabs(level - in_ulist)
				while delta:
					level = len(line[:in_ulist+1].split('*')) - 1
					if level > in_ulist:
						outstring += "<ul>"
						in_ulist += 1
					else:
						outstring += "</ul>"
						in_ulist -= 1

					delta = math.fabs(level - in_ulist)

			line = "<li>" + line[in_ulist:] + "</li>"
		elif in_ulist:
			while in_ulist:
				outstring += "</ul>"
				in_ulist -= 1

		if line.startswith('|'):
			if not in_table:
				in_table = True
				outstring += "<table>"
			parts = line.split('|')
			output = "<tr>"
			for p in parts[1:-1]:
				if p.startswith("="):
					output += "<th>" + p[1:] + "</th>"
				else:
					output += "<td>" + p + "</td>"
			output += "</tr>"
			line = output
		elif in_table:
			outstring += "</table>"
			in_table = False



		
		if line == '':
			for fmt in formattings:
				if fmt[2]:
					outstring += '</' + fmt[1] + '>'
					fmt[2] = False

			outstring += "</p><p>"
			continue



		if line.startswith('%'): #comments start with %, so just ignore it
			continue

		if line.startswith('----'):
			outstring += "<hr>"
			continue

		if line.startswith ('{{{'):
			outstring += "<pre class='brush: " + line[3:] + "'>"
			#if you start a line with {{{format, the pre gets its class set to that format
			in_pre = True
			continue


		for h in headings:
			if line.startswith(h[0]):
				line = line.strip(h[0])
				outstring += "<" + h[1] + " id='" + line + "'>"
				contents.append((line, h[2]))
				line = line + "</" + h[1] + ">"


		if line.count(r'\\'):
			line = line.replace(r'\\','<br>')


		line = re_pre_inline.sub(re_pre_inline_s,line)

		line = re_outlink_1.sub(re_outlink_1s,line)
		line = re_outlink_2.sub(re_outlink_2s,line)

		line = re_image_1.sub(re_image_1s,line)
		line = re_image_2.sub(re_image_2s,line)


		#these lines protect https and ftps from getting clobbered by the italics
		line = line.replace('http://','!http:~~!')
		line = line.replace('https://','!https:~~!')
		line = line.replace('ftp://','!ftp:~~!')
		for fmt in formattings:
			while line.count(fmt[0]):
				if fmt[2]:
					line = line.replace(fmt[0],'</' + fmt[1] + '>',1)
					fmt[2] = False
				else:
					line = line.replace(fmt[0],'<' + fmt[1] + '>',1)
					fmt[2] = True
		line = line.replace('!http:~~!','http://')
		line = line.replace('!https:~~!','https://')
		line = line.replace('!ftp:~~!','ftp://')



		#ine = re_italic.sub(re_italic_s,line)
		#ine = re_underline.sub(re_underline_s,line)

		outstring += line + "\n"

	if USE_TOC:
		if len(outstring) > TOC_LENGTH:
			outstring = tocmaker(contents) + outstring
	return outstring



if __name__ == '__main__':
	string = "[[test]]"
	#print parse(string)