285 lines
11 KiB
C#
285 lines
11 KiB
C#
using System.Text;
|
||
using System.Text.RegularExpressions;
|
||
|
||
namespace Atomx.Utils.Files
|
||
{
|
||
/// <summary>
|
||
/// 常用文件头识别工具(改进版:修复编译器警告、改善流读取安全性与类型判断)
|
||
/// </summary>
|
||
public static class FileTypes
|
||
{
|
||
// 保持原有 public 字段签名以兼容现有代码
|
||
public static readonly Dictionary<string, byte[]> ImageHeader = new();
|
||
public static readonly Dictionary<string, object> FilesHeader = new();
|
||
public static readonly Dictionary<string, object> VideoHeader = new();
|
||
|
||
static FileTypes()
|
||
{
|
||
ImageHeader.Add("gif", new byte[] { 71, 73, 70, 56, 57, 97 });
|
||
ImageHeader.Add("bmp", new byte[] { 66, 77 });
|
||
ImageHeader.Add("jpg", new byte[] { 255, 216, 255 });
|
||
ImageHeader.Add("png", new byte[] { 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82 });
|
||
|
||
FilesHeader.Add("pdf", new byte[] { 37, 80, 68, 70, 45, 49, 46, 53 });
|
||
FilesHeader.Add("docx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"word/_rels/document\.xml\.rels", RegexOptions.IgnoreCase) });
|
||
FilesHeader.Add("xlsx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"xl/_rels/workbook\.xml\.rels", RegexOptions.IgnoreCase) });
|
||
FilesHeader.Add("pptx", new object[] { new byte[] { 80, 75, 3, 4, 20, 0, 6, 0, 8, 0, 0, 0, 33 }, new Regex(@"ppt/_rels/presentation\.xml\.rels", RegexOptions.IgnoreCase) });
|
||
FilesHeader.Add("doc", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"microsoft( office)? word(?![\s\S]*?microsoft)", RegexOptions.IgnoreCase) });
|
||
FilesHeader.Add("xls", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"microsoft( office)? excel(?![\s\S]*?microsoft)", RegexOptions.IgnoreCase) });
|
||
FilesHeader.Add("ppt", new object[] { new byte[] { 208, 207, 17, 224, 161, 177, 26, 225 }, new Regex(@"c.u.r.r.e.n.t. .u.s.e.r(?![\s\S]*?[a-z])", RegexOptions.IgnoreCase) });
|
||
|
||
FilesHeader.Add("avi", new byte[] { 65, 86, 73, 32 });
|
||
FilesHeader.Add("mpg", new byte[] { 0, 0, 1, 0xBA });
|
||
FilesHeader.Add("mpeg", new byte[] { 0, 0, 1, 0xB3 });
|
||
FilesHeader.Add("rar", new byte[] { 82, 97, 114, 33, 26, 7 });
|
||
FilesHeader.Add("zip", new byte[] { 80, 75, 3, 4 });
|
||
|
||
VideoHeader.Add("avi", new byte[] { 65, 86, 73, 32 });
|
||
VideoHeader.Add("mpg", new byte[] { 0, 0, 1, 0xBA });
|
||
VideoHeader.Add("mpeg", new byte[] { 0, 0, 1, 0xB3 });
|
||
}
|
||
|
||
/// <summary>
|
||
/// 检测视频类型扩展方法
|
||
/// </summary>
|
||
public static string VideoType(this Stream stream)
|
||
{
|
||
if (stream == null) throw new ArgumentNullException(nameof(stream));
|
||
foreach (var kv in VideoHeader)
|
||
{
|
||
if (TryMatchHeader(stream, kv.Value, out var matched))
|
||
{
|
||
if (matched)
|
||
return kv.Key;
|
||
}
|
||
}
|
||
return string.Empty;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 检测常规文件类型扩展方法
|
||
/// </summary>
|
||
public static string FileType(this Stream stream)
|
||
{
|
||
if (stream == null) throw new ArgumentNullException(nameof(stream));
|
||
foreach (var kv in FilesHeader)
|
||
{
|
||
if (TryMatchHeader(stream, kv.Value, out var matched))
|
||
{
|
||
if (matched)
|
||
return kv.Key;
|
||
}
|
||
}
|
||
return string.Empty;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 检测图片类型扩展方法
|
||
/// </summary>
|
||
public static string ImageType(this Stream stream)
|
||
{
|
||
if (stream == null) throw new ArgumentNullException(nameof(stream));
|
||
foreach (var kv in ImageHeader)
|
||
{
|
||
var header = kv.Value;
|
||
if (header == null || header.Length == 0) continue;
|
||
|
||
if (ReadAndCompare(stream, header))
|
||
return kv.Key;
|
||
}
|
||
|
||
// 额外尝试判断是否是纯文本(防止误判),若为纯文本则返回空
|
||
// 只读取流内容进行检查,谨慎处理大流(可能会分配较大内存)
|
||
try
|
||
{
|
||
var content = stream.ReadAllBytesAndRestorePosition();
|
||
if (content.Length > 0)
|
||
{
|
||
var encodings = new[] { Encoding.ASCII, Encoding.UTF8 };
|
||
foreach (var enc in encodings)
|
||
{
|
||
var text = enc.GetString(content);
|
||
if (Regex.IsMatch(text, @"^[^\u0000-\u0008\u000B-\u000C\u000E-\u001F]*$"))
|
||
{
|
||
// 认为是文本,非图片
|
||
return string.Empty;
|
||
}
|
||
}
|
||
|
||
// Windows-936 编码仅在支持 CodePages 的情况下尝试使用
|
||
try
|
||
{
|
||
var cp936 = Encoding.GetEncoding(936);
|
||
var text936 = cp936.GetString(content);
|
||
if (Regex.IsMatch(text936, @"^[^\u0000-\u0008\u000B-\u000C\u000E-\u001F]*$"))
|
||
return string.Empty;
|
||
}
|
||
catch
|
||
{
|
||
// 在不支持 CodePages 的平台上忽略
|
||
}
|
||
}
|
||
}
|
||
catch
|
||
{
|
||
// 忽略检查错误,回退为无法识别
|
||
}
|
||
|
||
return string.Empty;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 将流完整读取为字节数组(更高效且保持原位置)
|
||
/// </summary>
|
||
public static byte[] StreamToBytes(this Stream stream)
|
||
{
|
||
if (stream == null) throw new ArgumentNullException(nameof(stream));
|
||
return stream.ReadAllBytesAndRestorePosition();
|
||
}
|
||
|
||
// ---------- 辅助方法 ----------
|
||
|
||
/// <summary>
|
||
/// 根据字节头或复杂描述(object[])进行匹配判断。
|
||
/// object 类型支持:
|
||
/// - byte[] : 仅比较头部字节
|
||
/// - object[] : 第一个元素为 byte[] 头部;第二个元素可以为 Regex 或 int(表示尾部偏移),后面可跟要比较的尾部 byte[] 列表
|
||
/// </summary>
|
||
private static bool TryMatchHeader(Stream stream, object value, out bool matched)
|
||
{
|
||
matched = false;
|
||
if (value == null) return false;
|
||
|
||
if (value is byte[] headerOnly)
|
||
{
|
||
matched = ReadAndCompare(stream, headerOnly);
|
||
return true;
|
||
}
|
||
|
||
if (value is object[] arr && arr.Length > 0 && arr[0] is byte[] header)
|
||
{
|
||
// 先比较头部
|
||
if (!ReadAndCompare(stream, header))
|
||
return true; // header no match -> not this type
|
||
|
||
// 若仅头部匹配,后续根据第二元素进一步校验
|
||
if (arr.Length >= 2)
|
||
{
|
||
var second = arr[1];
|
||
if (second is Regex regex)
|
||
{
|
||
var content = stream.ReadAllBytesAndRestorePosition();
|
||
var text = Encoding.ASCII.GetString(content);
|
||
matched = regex.IsMatch(text);
|
||
return true;
|
||
}
|
||
else if (second is int tailOffset)
|
||
{
|
||
// arr[2..] 为若干尾部字节数组,比对任一相等则通过
|
||
for (int i = 2; i < arr.Length; i++)
|
||
{
|
||
if (arr[i] is byte[] tailBytes)
|
||
{
|
||
if (ReadAndCompareTail(stream, tailOffset, tailBytes))
|
||
{
|
||
matched = true;
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
matched = false;
|
||
return true;
|
||
}
|
||
else
|
||
{
|
||
// 未知第二元素类型,视为仅头匹配
|
||
matched = true;
|
||
return true;
|
||
}
|
||
}
|
||
|
||
matched = true;
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 从流头部读取与目标字节数组比较,比较完成后恢复流位置
|
||
/// </summary>
|
||
private static bool ReadAndCompare(Stream stream, byte[] target)
|
||
{
|
||
if (target == null || target.Length == 0) return false;
|
||
var originalPos = stream.CanSeek ? stream.Position : (long?)null;
|
||
try
|
||
{
|
||
var buffer = new byte[target.Length];
|
||
if (!ReadFull(stream, buffer, 0, buffer.Length))
|
||
return false;
|
||
return buffer.SequenceEqual(target);
|
||
}
|
||
finally
|
||
{
|
||
if (originalPos.HasValue)
|
||
stream.Position = originalPos.Value;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 从流尾部根据偏移读取并比较
|
||
/// </summary>
|
||
private static bool ReadAndCompareTail(Stream stream, int offsetFromEnd, byte[] target)
|
||
{
|
||
if (target == null || target.Length == 0) return false;
|
||
if (!stream.CanSeek) return false;
|
||
var originalPos = stream.Position;
|
||
try
|
||
{
|
||
if (stream.Length < offsetFromEnd + target.Length) return false;
|
||
stream.Position = stream.Length - offsetFromEnd;
|
||
var buffer = new byte[target.Length];
|
||
if (!ReadFull(stream, buffer, 0, buffer.Length)) return false;
|
||
return buffer.SequenceEqual(target);
|
||
}
|
||
finally
|
||
{
|
||
stream.Position = originalPos;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 从流中读取指定长度的数据,直到读取到要求长度或 EOF(返回是否读取到完整长度)
|
||
/// </summary>
|
||
private static bool ReadFull(Stream stream, byte[] buffer, int offset, int count)
|
||
{
|
||
if (count <= 0) return true;
|
||
int read;
|
||
int total = 0;
|
||
while (total < count && (read = stream.Read(buffer, offset + total, count - total)) > 0)
|
||
{
|
||
total += read;
|
||
}
|
||
return total == count;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 将流全部读取为字节数组并尝试恢复原始 Position(若支持)
|
||
/// </summary>
|
||
private static byte[] ReadAllBytesAndRestorePosition(this Stream stream)
|
||
{
|
||
var originalPos = stream.CanSeek ? stream.Position : (long?)null;
|
||
try
|
||
{
|
||
using var ms = new MemoryStream();
|
||
stream.CopyTo(ms);
|
||
return ms.ToArray();
|
||
}
|
||
finally
|
||
{
|
||
if (originalPos.HasValue)
|
||
stream.Position = originalPos.Value;
|
||
}
|
||
}
|
||
}
|
||
} |