#!/usr/bin/env python3

import os
import re

defaultSubstitutions = {
	' - ': '—', # Em dash: used to mark a break in a sentence.
	' -- ': '—', # Em dash: used to mark a break in a sentence.
	'  -- ': '  — ', # Em dash: used to set off sources of quotes.
	'...': '…', # Ellipsis.
	' "/>': '"/>', # Remove trailing space.
	'"”': '"“', # Left double quotation mark.
	' ”': ' “', # Left double quotation mark.
	'"„': '"“', # Left double quotation mark.
	' „': ' “', # Left double quotation mark.
	'“"': '”"', # Right double quotation mark.
	'“ ': '” ', # Right double quotation mark.
}
defaultSubstitutionsRegex = re.compile(r'|'.join(re.escape(k) for k in sorted(defaultSubstitutions, key = len, reverse = True)), 
		flags = re.UNICODE | re.MULTILINE)
		
germanSubstitutions = {
	' - ': '—', # Em dash: used to mark a break in a sentence.
	' -- ': '—', # Em dash: used to mark a break in a sentence.
	'  -- ': '  — ', # Em dash: used to set off sources of quotes.
	'...': '…', # Ellipsis.
	' "/>': '"/>', # Remove trailing space.
	'"”': '"„', # Left double quotation mark.
	' ”': ' „', # Left double quotation mark.
	'"“': '"„', # Left double quotation mark.
	' “': ' „', # Left double quotation mark.
	'”"': '“"', # Right double quotation mark.
	'” ': '“ ', # Right double quotation mark.
}
germanSubstitutionsRegex = re.compile(r'|'.join(re.escape(k) for k in sorted(germanSubstitutions, key = len, reverse = True)), 
		flags = re.UNICODE | re.MULTILINE)
		
directory = os.path.dirname(__file__) + '/../../Data/Core/Languages/'
for root, subdirs, files in os.walk(directory):
	print('Entering ' + root + ' ...')
	if 'German' in root:
		substitutions = germanSubstitutions
		substitutionsRegex = germanSubstitutionsRegex 
	else:
		substitutions = defaultSubstitutions
		substitutionsRegex = defaultSubstitutionsRegex
	for filename in files:
		extension = os.path.splitext(filename)[1]
		if extension != '.xml':
			continue
		filename = os.path.join(root, filename)
		print('Formatting ' + filename + ' ...')
		with open(filename, 'r') as file:
			data = file.read()
			
			# Remove leading spaces.
			data = re.sub(r'value=" ([^ ][^<])', r'value="\1', data, flags = re.UNICODE | re.MULTILINE | re.DOTALL)
			
			# Remove multiple spaces.
			data = re.sub(r' {2,}([^— <])', r' \1', data, flags = re.UNICODE | re.MULTILINE | re.DOTALL)
			
			# Double quotation marks.
			if 'German' in root:
				data = re.sub(r'&quot;(.*?)&quot;', r'„\1“', data, flags = re.UNICODE | re.MULTILINE | re.DOTALL)
			else:
				data = re.sub(r'&quot;(.*?)&quot;', r'“\1”', data, flags = re.UNICODE | re.MULTILINE | re.DOTALL)
				
			data = substitutionsRegex.sub(lambda match: substitutions[match.group(0)], data)
			
		with open(filename, 'w') as file:
			file.write(data)
