1 private static void ShowCode() { 2 string[] strArray = { "b", "abcd", "乙", "甲乙丙丁" }; 3 byte[] buffer; 4 string mode, back; 5 6 foreach (string str in strArray) { 7 8 for (int i = 0; i <= 2; i++) { 9 if (i == 0) {10 buffer = Encoding.ASCII.GetBytes(str);11 back = Encoding.ASCII.GetString(buffer, 0, buffer.Length);12 mode = "ASCII";13 } else if (i == 1) {14 buffer = Encoding.UTF8.GetBytes(str);15 back = Encoding.UTF8.GetString(buffer, 0, buffer.Length);16 mode = "UTF8";17 } else {18 buffer = Encoding.Unicode.GetBytes(str);19 back = Encoding.Unicode.GetString(buffer, 0, buffer.Length);20 mode = "Unicode";21 }22 23 Console.WriteLine("Mode: {0}, String: {1}, Buffer.Length: {2}",24 mode, str, buffer.Length);25 26 Console.WriteLine("Buffer:");27 for (int j = 0; j <= buffer.Length - 1; j++) {28 Console.Write(buffer[j] + " ");29 }30 31 Console.WriteLine("\nRetrived: {0}\n", back);32 }33 }34 }
程序输出为:
Mode: ASCII, String: b, Buffer.Length: 1Buffer: 98Retrived: bMode: UTF8, String: b, Buffer.Length: 1Buffer: 98Retrived: bMode: Unicode, String: b, Buffer.Length: 2Buffer: 98 0Retrived: bMode: ASCII, String: abcd, Buffer.Length: 4Buffer: 97 98 99 100Retrived: abcdMode: UTF8, String: abcd, Buffer.Length: 4Buffer: 97 98 99 100Retrived: abcdMode: Unicode, String: abcd, Buffer.Length: 8Buffer: 97 0 98 0 99 0 100 0Retrived: abcdMode: ASCII, String: 乙, Buffer.Length: 1Buffer: 63Retrived: ?Mode: UTF8, String: 乙, Buffer.Length: 3Buffer: 228 185 153Retrived: 乙Mode: Unicode, String: 乙, Buffer.Length: 2Buffer: 89 78Retrived: 乙Mode: ASCII, String: 甲乙丙丁, Buffer.Length: 4Buffer: 63 63 63 63Retrived: ????Mode: UTF8, String: 甲乙丙丁, Buffer.Length: 12Buffer: 231 148 178 228 185 153 228 184 153 228 184 129Retrived: 甲乙丙丁Mode: Unicode, String: 甲乙丙丁, Buffer.Length: 8Buffer: 50 117 89 78 25 78 1 78Retrived: 甲乙丙丁
大体上可以得出这么几个结论:
- ASCII不能保存中文(貌似谁都知道=_-`)。
- UTF8是变长编码。在对ASCII字符编码时,UTF更省空间,只占1个字节,与ASCII编码方式和长度相同;Unicode在对ASCII字符编码时,占用2个字节,且第2个字节补零。
- UTF8在对中文编码时需要占用3个字节;Unicode对中文编码则只需要2个字节。