代码:
usc2.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
void ucs2_to_utf8(uint16_t ucs2_char, char *utf8_str) {
if (ucs2_char < 0x80) {
// 1-byte sequence: 0xxxxxxx
utf8_str[0] = (char)ucs2_char;
utf8_str[1] = '\0';
} else if (ucs2_char < 0x800) {
// 2-byte sequence: 110xxxxx 10xxxxxx
utf8_str[0] = (char)(0xC0 | ((ucs2_char >> 6) & 0x1F));
utf8_str[1] = (char)(0x80 | (ucs2_char & 0x3F));
utf8_str[2] = '\0';
} else if (ucs2_char < 0x10000) {
// 虽然UCS-2只支持到0xFFFF,但UTF-8的3-byte序列仍然适用
// 3-byte sequence: 1110xxxx 10xxxxxx 10xxxxxx
utf8_str[0] = (char)(0xE0 | ((ucs2_char >> 12) & 0x0F));
utf8_str[1] = (char)(0x80 | ((ucs2_char >> 6) & 0x3F));
utf8_str[2] = (char)(0x80 | (ucs2_char & 0x3F));
utf8_str[3] = '\0';
} else {
// 对于UCS-2,这个分支实际上是不会触发的,因为它只支持到0xFFFF
// 但为了完整性,这里仍然保留
utf8_str[0] = '?'; // 或者其他错误指示符
utf8_str[1] = '\0';
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <hex_string>\n", argv[0]);
return 1;
}
char *hex_str = argv[1];
size_t hex_str_len = strlen(hex_str);
// 确保hex_str的长度是4的倍数(每个UCS-2字符占用4个十六进制数字)
if (hex_str_len % 4 != 0) {
fprintf(stderr, "Error: Invalid hex string length (must be a multiple of 4).\n");
return 1;
}
char *result = NULL;
size_t result_len = 0;
size_t capacity = 0;
// 遍历hex_str,每4个字符转换为一个UCS-2字符,并转换为UTF-8
for (size_t i = 0; i < hex_str_len; i += 4) {
char hex_substr[5]; // 4个十六进制数字加上终止符
strncpy(hex_substr, hex_str + i, 4);
hex_substr[4] = '\0'; // 确保字符串正确终止
// 将hex_substr转换为uint16_t
uint16_t ucs2_char = 0;
sscanf(hex_substr, "%hx", &ucs2_char);
// 准备存储UTF-8字符串的数组(足够大以存储3字节的UTF-8字符和终止符)
char utf8_str[4];
// 调用转换函数
ucs2_to_utf8(ucs2_char, utf8_str);
// 输出结果
//printf("UCS-2 (hex): %s, UTF-8: %s\n", hex_substr, utf8_str);
size_t new_len = result_len + strlen(utf8_str); // 假设utf8_str是一个以null结尾的字符串
if (new_len > capacity) {
capacity = new_len + 10; // 加上一些额外的空间以防止频繁重新分配
result = realloc(result, capacity);
if (!result) {
perror("realloc failed");
exit(EXIT_FAILURE);
}
}
// 将新的字符追加到结果字符串中
strcpy(result + result_len, utf8_str);
result_len = new_len;
}
// 打印结果字符串
printf("%s\n", result);
// 释放分配的内存
free(result);
return 0;
}
编译:
gcc -o usc2 usc2.c
使用:
chmod +x usc2
./usc2 0072003300306B228FCE4F60676552305E7F5DDE8F897FA4667A80FD79D1628067099650516C53F8