/*
* utf8conv version 1.0   The hex to UTF-8 Convertor
* Copyright (C) 2001 FibreSpeed, Inc.
*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*
* Contact the author, Michael T. Babcock at <mbabcock@fibrespeed.net>
* or the Copyright holder, FibreSpeed Inc. at <info@fibrespeed.net>
*/

#include <stdio.h>
#include <stdlib.h>

#include "UnicodeData.h"
#include "UnicodeMasks.h"

int opt_quiet = 0, opt_showname = 0, opt_debug = 0, opt_raw = 0;

void UTF8_too_big(const int bytes)
{
	printf ("Error: %d byte UTF codes not yet handled.", bytes);
}

void print_byte_in_binary(const unsigned char val)
{
	int i = 0;
	for (i = 7; i >= 0; i--)
		printf ("%d", (val & (1 << i)) != 0);
	printf (" ");
}

void print_int_in_binary(const unsigned int val) 
{
	print_byte_in_binary(val >> 8);
	printf("  ");
	print_byte_in_binary(val);
}

void print_long_in_binary(const unsigned long val) 
{
	print_int_in_binary(val >> 16);
	printf("  ");
	print_int_in_binary(val);
}

/* TODO: put the bytes into *buf */
int get_UTF8(const unsigned long val, char (*buf)[])
{
	int bytes = 1, i = 0;
	unsigned char byte[6];
	unsigned long bits = val;

	if (opt_debug) {
		printf("val: ");
		print_long_in_binary(val);
		printf("\n");
	}

	if (val <= 0x7f) {
		byte[0] = (char)(val & M01111111);
		(*buf)[0] = byte[0];
		(*buf)[1] = 0;
		if (opt_debug)
			printf("UTF-8: ");
		printf("<%0X>", byte[0]);
		return 0;
	}
	
	bytes ++;
	if (val > 0x7ff) bytes ++;
	if (val > 0xffff) {
		bytes ++;
		if (val > 0x1fffff) {
			bytes ++;
			if (val > 0x3ffffff) {
					bytes ++;
				if (val > 0x7fffffff)
					bytes ++;
			}
		}
		UTF8_too_big(bytes);
		return 1;
	}

	/* Prepare LSBs */
	for (i = 0; i < (bytes-1); i++) {
		byte[i] = (bits & M00111111) | M10000000;
		bits = bits >> 6;
	}
	switch (bytes) {
		case 2:
			byte[1] = (bits & M00011111) | M11000000;
			if (opt_debug) {
				printf("                           ");
				print_byte_in_binary(byte[1]);
				printf("  "); print_byte_in_binary(byte[0]);
				printf("\n");
			}
			break;
		case 3:
			byte[2] = (bits & M00001111) | M11100000;
			if (opt_debug) {
				printf("                ");
				print_byte_in_binary(byte[2]);
				printf("  "); print_byte_in_binary(byte[1]);
				printf("  "); print_byte_in_binary(byte[0]);
				printf("\n");
			}
			break;
		case 4:
			byte[3] = (bits & M00000111) | M11110000;
			if (opt_debug) {
				printf("     ");
				print_byte_in_binary(byte[3]);
				printf("  "); print_byte_in_binary(byte[2]);
				printf("  "); print_byte_in_binary(byte[1]);
				printf("  "); print_byte_in_binary(byte[0]);
				printf("\n");
			}
			break;
		default:
			printf("Too many bytes!\n");
			return 2;
	}
	if (opt_debug)
		printf("UTF-8: ");
	for (i = bytes-1; i>=0; i--) {
		printf("<%0X>", byte[i]);
		(*buf)[i] = byte[i];
	}
	(*buf)[i] = 0;
	
	return 0;
}

void show_unicodename(const unsigned long val)
{
	unsigned long i = 0;

	for (i=0; unicode_data[i].code < 0xFFFFD; i++) {
		if (unicode_data[i].code == val) {
			printf("Description: %s\n", unicode_data[i].description);
		}
	}
}

void show_help()
{
	printf("  -h   This help screen\n");
	printf("  -q   As quiet as possible\n");
	printf("  -r   Print raw Unicode bytes (not working)\n");
	printf("  -s   Show Unicode character names\n");
	printf("  -v   Be more verbose\n");
}

int main(int argc, char *argv[])
{
	char buf[1025], *check = NULL;
	unsigned long val = 0;
	int i;

	for (i = 1; i < argc; i++) {
		if (argv[i][1] == 'h') {
			show_help(); 
			return 0;
		}
		if (argv[i][1] == 'q') opt_quiet = 1;
		if (argv[i][1] == 'r') opt_raw = 1;
		if (argv[i][1] == 's') opt_showname = 1;
		if (argv[i][1] == 'v') opt_debug = 1;
	}

	if (!opt_quiet) {
		printf("Enter one 4 digit hex value per line.\n");
		printf("Hit ENTER alone when finished\n");
	}

	while (fgets(buf, 1024, stdin) && buf[0] != '\n') {
		val = strtoul(buf, &check, 16);
		if (check == '\0') {
			printf("Conversion failed.\n");
			break;
		}
		if (opt_showname) {
			show_unicodename(val);
		}
		if (get_UTF8(val, &buf) == 0) {
			if (opt_raw) printf ("\tCharacter: %s", buf);
		}
		printf("\n");
	}
	return 0;
}

