1 /// Author: Aziz Köksal 2 /// License: GPL3 3 /// $(Maturity very high) 4 module dil.FileBOM; 5 6 import common; 7 8 /// Enumeration of byte order marks. 9 enum BOM 10 { 11 None, /// No BOM 12 UTF8, /// UTF-8: EF BB BF 13 UTF16BE, /// UTF-16 Big Endian: FE FF 14 UTF16LE, /// UTF-16 Little Endian: FF FE 15 UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF 16 UTF32LE /// UTF-32 Little Endian: FF FE 00 00 17 } 18 19 /// Looks at the first bytes of data and returns the corresponding BOM. 20 BOM tellBOM(const(ubyte)[] data) 21 { 22 BOM bom = BOM.None; 23 24 if (data.length < 2) 25 { /+bom = BOM.None;+/ } 26 else if (data[0..2] == x"FE FF") 27 { 28 bom = BOM.UTF16BE; // FE FF 29 } 30 else if (data[0..2] == x"FF FE") 31 { 32 if (data.length >= 4 && data[2..4] == x"00 00") 33 bom = BOM.UTF32LE; // FF FE 00 00 34 else 35 bom = BOM.UTF16LE; // FF FE XX XX 36 } 37 else if (data[0..2] == cast(ubyte[2])x"00 00") 38 { 39 if (data.length >= 4 && data[2..4] == x"FE FF") 40 bom = BOM.UTF32BE; // 00 00 FE FF 41 } 42 else if (data[0..2] == x"EF BB") 43 { 44 if (data.length >= 3 && data[2] == '\xBF') 45 bom = BOM.UTF8; // EF BB BF 46 } 47 return bom; 48 } 49 50 void testTellBOM() 51 { 52 scope msg = new UnittestMsg("Testing function tellBOM()."); 53 54 struct Data2BOM 55 { 56 ubyte[] data; 57 BOM bom; 58 } 59 alias ub = ubyte[]; 60 const Data2BOM[] map = [ 61 {cast(ub)x"12", BOM.None}, 62 {cast(ub)x"12 34", BOM.None}, 63 {cast(ub)x"00 00 FF FE", BOM.None}, 64 {cast(ub)x"EF BB FF", BOM.None}, 65 66 {cast(ub)x"EF", BOM.None}, 67 {cast(ub)x"EF BB", BOM.None}, 68 {cast(ub)x"FE", BOM.None}, 69 {cast(ub)x"FF", BOM.None}, 70 {cast(ub)x"00", BOM.None}, 71 {cast(ub)x"00 00", BOM.None}, 72 {cast(ub)x"00 00 FE", BOM.None}, 73 74 {cast(ub)x"FE FF 00", BOM.UTF16BE}, 75 {cast(ub)x"FE FF 00 FF", BOM.UTF16BE}, 76 77 {cast(ub)x"EF BB BF", BOM.UTF8}, 78 {cast(ub)x"FE FF", BOM.UTF16BE}, 79 {cast(ub)x"FF FE", BOM.UTF16LE}, 80 {cast(ub)x"00 00 FE FF", BOM.UTF32BE}, 81 {cast(ub)x"FF FE 00 00", BOM.UTF32LE} 82 ]; 83 84 foreach (pair; map) 85 assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data)); 86 }