using System.Text; using System.Text.RegularExpressions; namespace Atomx.Utils.Files { /// /// 常用文件头识别工具(改进版:修复编译器警告、改善流读取安全性与类型判断) /// public static class FileTypes { // 保持原有 public 字段签名以兼容现有代码 public static readonly Dictionary ImageHeader = new(); public static readonly Dictionary FilesHeader = new(); public static readonly Dictionary VideoHeader = new(); static FileTypes() { ImageHeader.Add("gif", new byte[] { 71, 73, 70, 56, 57, 97 }); ImageHeader.Add("bmp", new byte[] { 66, 77 }); ImageHeader.Add("jpg", new byte[] { 255, 216, 255 }); ImageHeader.Add("png", new byte[] { 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82 }); FilesHeader.Add("pdf", new byte[] { 37, 80, 68, 70, 45, 49, 46, 53 }); FilesHeader.Add("docx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"word/_rels/document\.xml\.rels", RegexOptions.IgnoreCase) }); FilesHeader.Add("xlsx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"xl/_rels/workbook\.xml\.rels", RegexOptions.IgnoreCase) }); FilesHeader.Add("pptx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"ppt/_rels/presentation\.xml\.rels", RegexOptions.IgnoreCase) }); FilesHeader.Add("doc", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"microsoft( office)? word(?![\s\S]*?microsoft)", RegexOptions.IgnoreCase) }); FilesHeader.Add("xls", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"microsoft( office)? excel(?![\s\S]*?microsoft)", RegexOptions.IgnoreCase) }); FilesHeader.Add("ppt", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"c.u.r.r.e.n.t. .u.s.e.r(?![\s\S]*?[a-z])", RegexOptions.IgnoreCase) }); FilesHeader.Add("avi", new byte[] { 65, 86, 73, 32 }); FilesHeader.Add("mpg", new byte[] { 0, 0, 1, 0xBA }); FilesHeader.Add("mpeg", new byte[] { 0, 0, 1, 0xB3 }); FilesHeader.Add("rar", new byte[] { 82, 97, 114, 33, 26, 7 }); FilesHeader.Add("zip", new byte[] { 80, 75, 3, 4 }); VideoHeader.Add("avi", new byte[] { 65, 86, 73, 32 }); VideoHeader.Add("mpg", new byte[] { 0, 0, 1, 0xBA }); VideoHeader.Add("mpeg", new byte[] { 0, 0, 1, 0xB3 }); } /// /// 检测视频类型扩展方法 /// public static string VideoType(this Stream stream) { if (stream == null) throw new ArgumentNullException(nameof(stream)); foreach (var kv in VideoHeader) { if (TryMatchHeader(stream, kv.Value, out var matched)) { if (matched) return kv.Key; } } return string.Empty; } /// /// 检测常规文件类型扩展方法 /// public static string FileType(this Stream stream) { if (stream == null) throw new ArgumentNullException(nameof(stream)); foreach (var kv in FilesHeader) { if (TryMatchHeader(stream, kv.Value, out var matched)) { if (matched) return kv.Key; } } return string.Empty; } /// /// 检测图片类型扩展方法 /// public static string ImageType(this Stream stream) { if (stream == null) throw new ArgumentNullException(nameof(stream)); foreach (var kv in ImageHeader) { var header = kv.Value; if (header == null || header.Length == 0) continue; if (ReadAndCompare(stream, header)) return kv.Key; } // 额外尝试判断是否是纯文本(防止误判),若为纯文本则返回空 // 只读取流内容进行检查,谨慎处理大流(可能会分配较大内存) try { var content = stream.ReadAllBytesAndRestorePosition(); if (content.Length > 0) { var encodings = new[] { Encoding.ASCII, Encoding.UTF8 }; foreach (var enc in encodings) { var text = enc.GetString(content); if (Regex.IsMatch(text, @"^[^\u0000-\u0008\u000B-\u000C\u000E-\u001F]*$")) { // 认为是文本,非图片 return string.Empty; } } // Windows-936 编码仅在支持 CodePages 的情况下尝试使用 try { var cp936 = Encoding.GetEncoding(936); var text936 = cp936.GetString(content); if (Regex.IsMatch(text936, @"^[^\u0000-\u0008\u000B-\u000C\u000E-\u001F]*$")) return string.Empty; } catch { // 在不支持 CodePages 的平台上忽略 } } } catch { // 忽略检查错误,回退为无法识别 } return string.Empty; } /// /// 将流完整读取为字节数组(更高效且保持原位置) /// public static byte[] StreamToBytes(this Stream stream) { if (stream == null) throw new ArgumentNullException(nameof(stream)); return stream.ReadAllBytesAndRestorePosition(); } // ---------- 辅助方法 ---------- /// /// 根据字节头或复杂描述(object[])进行匹配判断。 /// object 类型支持: /// - byte[] : 仅比较头部字节 /// - object[] : 第一个元素为 byte[] 头部;第二个元素可以为 Regex 或 int(表示尾部偏移),后面可跟要比较的尾部 byte[] 列表 /// private static bool TryMatchHeader(Stream stream, object value, out bool matched) { matched = false; if (value == null) return false; if (value is byte[] headerOnly) { matched = ReadAndCompare(stream, headerOnly); return true; } if (value is object[] arr && arr.Length > 0 && arr[0] is byte[] header) { // 先比较头部 if (!ReadAndCompare(stream, header)) return true; // header no match -> not this type // 若仅头部匹配,后续根据第二元素进一步校验 if (arr.Length >= 2) { var second = arr[1]; if (second is Regex regex) { var content = stream.ReadAllBytesAndRestorePosition(); var text = Encoding.ASCII.GetString(content); matched = regex.IsMatch(text); return true; } else if (second is int tailOffset) { // arr[2..] 为若干尾部字节数组,比对任一相等则通过 for (int i = 2; i < arr.Length; i++) { if (arr[i] is byte[] tailBytes) { if (ReadAndCompareTail(stream, tailOffset, tailBytes)) { matched = true; return true; } } } matched = false; return true; } else { // 未知第二元素类型,视为仅头匹配 matched = true; return true; } } matched = true; return true; } return false; } /// /// 从流头部读取与目标字节数组比较,比较完成后恢复流位置 /// private static bool ReadAndCompare(Stream stream, byte[] target) { if (target == null || target.Length == 0) return false; var originalPos = stream.CanSeek ? stream.Position : (long?)null; try { var buffer = new byte[target.Length]; if (!ReadFull(stream, buffer, 0, buffer.Length)) return false; return buffer.SequenceEqual(target); } finally { if (originalPos.HasValue) stream.Position = originalPos.Value; } } /// /// 从流尾部根据偏移读取并比较 /// private static bool ReadAndCompareTail(Stream stream, int offsetFromEnd, byte[] target) { if (target == null || target.Length == 0) return false; if (!stream.CanSeek) return false; var originalPos = stream.Position; try { if (stream.Length < offsetFromEnd + target.Length) return false; stream.Position = stream.Length - offsetFromEnd; var buffer = new byte[target.Length]; if (!ReadFull(stream, buffer, 0, buffer.Length)) return false; return buffer.SequenceEqual(target); } finally { stream.Position = originalPos; } } /// /// 从流中读取指定长度的数据,直到读取到要求长度或 EOF(返回是否读取到完整长度) /// private static bool ReadFull(Stream stream, byte[] buffer, int offset, int count) { if (count <= 0) return true; int read; int total = 0; while (total < count && (read = stream.Read(buffer, offset + total, count - total)) > 0) { total += read; } return total == count; } /// /// 将流全部读取为字节数组并尝试恢复原始 Position(若支持) /// private static byte[] ReadAllBytesAndRestorePosition(this Stream stream) { var originalPos = stream.CanSeek ? stream.Position : (long?)null; try { using var ms = new MemoryStream(); stream.CopyTo(ms); return ms.ToArray(); } finally { if (originalPos.HasValue) stream.Position = originalPos.Value; } } } }