龙空技术网

从ClickHouse中流式查询大数据

opendotnet 765

前言:

如今朋友们对“phpspreadsheet内存溢出”大致比较关心,兄弟们都想要分析一些“phpspreadsheet内存溢出”的相关文章。那么小编同时在网络上收集了一些对于“phpspreadsheet内存溢出””的相关内容,希望各位老铁们能喜欢,你们快快来了解一下吧!

提示:本篇不适合手机阅读,主要为了提供代码实现。

为了减速少大的Excel文件在内存中驻留,使用流的方式,边查询边组装,边下载文件相对来说是一个好的方式,下面是基于ClickHouse数据源的方式,下载100万条记录的处理方式,本地测试,内存只有100多M的使用,下载完后就会释放。

注:下面的代码仅是演示代码,本文中的组装excel参考

using Microsoft.Extensions.Configuration;using System.Globalization;using System.IO;using System.IO.Compression;using System.Text.RegularExpressions;using System.Text;using System.Xml;using Microsoft.AspNetCore.Razor.TagHelpers;using static System.Net.WebRequestMethods;

var builder = WebApplication.CreateBuilder(args);builder.Services.AddHttpClient();var app = builder.Build();

app.MapGet("/getfile", async (IHttpClientFactory clientFactory, CancellationToken token) =>{ //用http的方式查询clickhouse var client = clientFactory.CreateClient(); var sql = "select field1,field2,field3,field4,field5,field6,field7,field8,field9,field10 from tablename order by field1 limit 1000000"; var stream = await client.GetStreamAsync($";password=mypassword&query={sql} FORMAT JSONCompactEachRowWithNamesAndTypes", token);

var utf8encoding = new UTF8Encoding(true); var bufferSize = 5 * 1024 * 1024; var memoryStream = new MemoryStream(); var archive = new ExcelZipArchive(memoryStream, ZipArchiveMode.Create, true, utf8encoding); var zipDictionary = new Dictionary<string, ZipPackageInfo>(); var id = $"R{Guid.NewGuid():N}"; var sheetName = "Sheet1"; var sheetPath = $"xl/worksheets/sheet1.xml"; var sheetIdx = 1;

#region 组装openxml的zip { var _defaultRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?><Relationships xmlns=""""> <Relationship Type="""" Target=""xl/workbook.xml"" Id=""Rfc2254092b6248a9"" /></Relationships>"); CreateZipEntry("_rels/.rels", "application/vnd.openxmlformats-package.relationships+xml", _defaultRels); } { var _defaultSharedString = ReplaceString("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?><sst xmlns=\"\" count=\"0\" uniqueCount=\"0\"></sst>"); CreateZipEntry("xl/sharedStrings.xml", "application/vnd.openxmlformats-package.relationships+xml", _defaultSharedString); } { var entry = archive.CreateEntry(sheetPath, CompressionLevel.Fastest); using var zipStream = entry.Open(); using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize); writer.Write($@"<?xml version=""1.0"" encoding=""utf-8""?><x:worksheet xmlns:x="""">"); var yIndex = 1; var xIndex = 1; { var sReader = new StreamReader(stream); if (!sReader.EndOfStream) { var nameLine = await sReader.ReadLineAsync(); var typeLine = await sReader.ReadLineAsync(); var fieldNameArr = System.Text.Json.JsonSerializer.Deserialize<object[]>(nameLine); int fieldCount = fieldNameArr.Length; //处理列宽度 writer.Write($@"<x:cols>"); for (int i = 0; i < fieldCount; i++) { writer.Write($@"<x:col min=""{i + 1}"" max=""{i + 1}"" {$@"width=""30"""} customWidth=""1"" />"); } writer.Write($@"</x:cols>"); //处理表格 writer.Write("<x:sheetData>"); //处理表头 writer.Write($"<x:row r=\"{yIndex}\">"); xIndex = 1; for (int i = 0; i < fieldCount; i++) { var columnName = fieldNameArr[i].ToString(); WriteC(writer, "1", columnName); xIndex++; } writer.Write($"</x:row>"); yIndex++; } //处理表格数据 while (!sReader.EndOfStream) { writer.Write($"<x:row r=\"{yIndex}\">"); var dataLine = await sReader.ReadLineAsync(); var dataArr = System.Text.Json.JsonSerializer.Deserialize<object[]>(dataLine); xIndex = 1; for (var i = 0; i < dataArr.Length; i++) { WriteCell(writer, yIndex, xIndex, dataArr[i]); xIndex++; } writer.Write($"</x:row>"); yIndex++; } } writer.Write("</x:sheetData>"); writer.Write("</x:worksheet>"); writer.Flush(); } { string _defaultStylesXml = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?><x:styleSheet xmlns:x=""""> <x:numFmts count=""1""> <x:numFmt numFmtId=""0"" formatCode="""" /> </x:numFmts> <x:fonts count=""2""> <x:font> <x:vertAlign val=""baseline"" /> <x:sz val=""11"" /> <x:color rgb=""FF000000"" /> <x:name val=""Calibri"" /> <x:family val=""2"" /> </x:font> <x:font> <x:vertAlign val=""baseline"" /> <x:sz val=""11"" /> <x:color rgb=""00000000"" /> <x:name val=""Calibri"" /> <x:family val=""2"" /> </x:font> </x:fonts> <x:fills count=""3""> <x:fill> <x:patternFill patternType=""none"" /> </x:fill> <x:fill> <x:patternFill patternType=""gray125"" /> </x:fill> <x:fill> <x:patternFill patternType=""solid""> <x:fgColor rgb=""FFFFFFFF"" /> </x:patternFill> </x:fill> </x:fills> <x:borders count=""2""> <x:border diagonalUp=""0"" diagonalDown=""0""> <x:left style=""none""> <x:color rgb=""FF000000"" /> </x:left> <x:right style=""none""> <x:color rgb=""FF000000"" /> </x:right> <x:top style=""none""> <x:color rgb=""FF000000"" /> </x:top> <x:bottom style=""none""> <x:color rgb=""FF000000"" /> </x:bottom> <x:diagonal style=""none""> <x:color rgb=""FF000000"" /> </x:diagonal> </x:border> <x:border diagonalUp=""0"" diagonalDown=""0""> <x:left style=""thin""> <x:color rgb=""FF000000"" /> </x:left> <x:right style=""thin""> <x:color rgb=""FF000000"" /> </x:right> <x:top style=""thin""> <x:color rgb=""FF000000"" /> </x:top> <x:bottom style=""thin""> <x:color rgb=""FF000000"" /> </x:bottom> <x:diagonal style=""none""> <x:color rgb=""FF000000"" /> </x:diagonal> </x:border> </x:borders> <x:cellStyleXfs count=""4""> <x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""0"" applyAlignment=""1"" applyProtection=""1""> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""14"" fontId=""1"" fillId=""2"" borderId=""1"" applyNumberFormat=""1"" applyFill=""0"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""3"" fontId=""0"" fillId=""0"" borderId=""1"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:protection locked=""1"" hidden=""0""/> </x:xf> </x:cellStyleXfs> <x:cellXfs count=""5""> <x:xf></x:xf> <x:xf numFmtId=""0"" fontId=""1"" fillId=""2"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""0"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:alignment horizontal=""left"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" /> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:alignment horizontal=""general"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" /> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""14"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:alignment horizontal=""general"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" /> <x:protection locked=""1"" hidden=""0"" /> </x:xf> <x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyBorder=""1"" applyAlignment=""1""> <x:alignment horizontal=""fill""/> </x:xf> <x:xf numFmtId=""3"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1""> <x:alignment horizontal=""center"" vertical=""center"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0""/> <x:protection locked=""1"" hidden=""0""/> </x:xf> </x:cellXfs> <x:cellStyles count=""1""> <x:cellStyle name=""Normal"" xfId=""0"" builtinId=""0"" /> </x:cellStyles></x:styleSheet>"); var styleXml = _defaultStylesXml; CreateZipEntry(@"xl/styles.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml", styleXml); } { string _defaultDrawingXmlRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8"" standalone=""yes""?><Relationships xmlns=""""> {{format}}</Relationships>"); var drawing = new StringBuilder(); CreateZipEntry($"xl/drawings/_rels/drawing1.xml.rels", "", _defaultDrawingXmlRels.Replace("{{format}}", drawing.ToString())); } { var drawing = new StringBuilder(); string _defaultDrawing = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8"" standalone=""yes""?><xdr:wsDr xmlns:a="""" xmlns:r="""" xmlns:xdr=""""> {{format}}</xdr:wsDr>"); CreateZipEntry($"xl/drawings/drawing1.xml", "application/vnd.openxmlformats-officedocument.drawing+xml", _defaultDrawing.Replace("{{format}}", drawing.ToString())); } { var workbookXml = new StringBuilder(); var workbookRelsXml = new StringBuilder();

var sheetId = 1; workbookXml.AppendLine($@"<x:sheet name=""{sheetName}"" sheetId=""{sheetId}"" r:id=""{id}"" />"); workbookRelsXml.AppendLine($@"<Relationship Type="""" Target=""/{sheetPath}"" Id=""{id}"" />");

var sheetRelsXml = ReplaceString($@"<Relationship Type="""" Target=""../drawings/drawing{sheetId}.xml"" Id=""drawing{sheetId}"" />"); string _defaultSheetRelXml = ReplaceString(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?><Relationships xmlns=""""> {{format}}</Relationships>"); CreateZipEntry($"xl/worksheets/_rels/sheet{sheetIdx}.xml.rels", "", _defaultSheetRelXml.Replace("{{format}}", sheetRelsXml));

string _defaultWorkbookXml = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?><x:workbook xmlns:r="""" xmlns:x=""""> <x:sheets> {{sheets}} </x:sheets></x:workbook>"); CreateZipEntry(@"xl/workbook.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml", _defaultWorkbookXml.Replace("{{sheets}}", workbookXml.ToString()));

string _defaultWorkbookXmlRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?><Relationships xmlns=""""> {{sheets}} <Relationship Type="""" Target=""/xl/styles.xml"" Id=""R3db9602ace774fdb"" /></Relationships>"); CreateZipEntry(@"xl/_rels/workbook.xml.rels", "", _defaultWorkbookXmlRels.Replace("{{sheets}}", workbookRelsXml.ToString()));

var sb = new StringBuilder(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?><Types xmlns=""""><Default ContentType=""application/vnd.openxmlformats-officedocument.spreadsheetml.printerSettings"" Extension=""bin""/><Default ContentType=""application/xml"" Extension=""xml""/><Default ContentType=""image/jpeg"" Extension=""jpg""/><Default ContentType=""image/png"" Extension=""png""/><Default ContentType=""image/gif"" Extension=""gif""/><Default ContentType=""application/vnd.openxmlformats-package.relationships+xml"" Extension=""rels""/>"); foreach (var p in zipDictionary) { sb.Append($"<Override ContentType=\"{p.Value.ContentType}\" PartName=\"/{p.Key}\" />"); } sb.Append("</Types>"); var entry = archive.CreateEntry("[Content_Types].xml", CompressionLevel.Fastest); using var zipStream = entry.Open(); using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize); writer.Write(sb.ToString()); } #endregion archive.Dispose(); void CreateZipEntry(string path, string contentType, string content) { var entry = archive.CreateEntry(path, CompressionLevel.Fastest); using var zipStream = entry.Open(); using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize); writer.Write(content); if (!string.IsOrEmpty(contentType)) zipDictionary.Add(path, new ZipPackageInfo(entry, contentType)); } memoryStream.Seek(0, SeekOrigin.Begin); await memoryStream.FlushAsync(token); return TypedResults.File(fileStream: memoryStream, contentType: "application/octet-stream", fileDownloadName: $"{DateTime.Now.ToString("yyyyMMddhhssmm")}.xlsx");

});app.Run();

string ReplaceString(string xml) => xml.Replace("\r", "").Replace("\n", "").Replace("\t", "");

string ConvertXyToCell(int x, int y){ int dividend = x; string columnName = String.Empty; int modulo; while (dividend > 0) { modulo = (dividend - 1) % 26; columnName = Convert.ToChar(65 + modulo).ToString() + columnName; dividend = (int)((dividend - modulo) / 26); } return $"{columnName}{y}";}

void WriteC(StreamWriter writer, string r, string columnName){ writer.Write($"<x:c r=\"{r}\" t=\"str\" s=\"1\">"); writer.Write($"<x:v>{EncodeXML(columnName)}"); //issue I45TF5 writer.Write($"</x:v>"); writer.Write($"</x:c>");}

void WriteCell(StreamWriter writer, int rowIndex, int cellIndex, object value){ var v = string.Empty; var t = "str"; var s = "2"; if (value == ) { v = ""; } else if (value is string str) { v = EncodeXML(str); } else { var type = value.GetType(); type = able.GetUnderlyingType(type) ?? type; if (IsNumericType(type)) { t = "n"; //if (isMoney) //{ // s = "5"; //}

if (type.IsAssignableFrom(typeof(decimal))) v = ((decimal)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Int32))) v = ((Int32)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Double))) v = ((Double)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Int64))) v = ((Int64)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(UInt32))) v = ((UInt32)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(UInt16))) v = ((UInt16)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(UInt64))) v = ((UInt64)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Int16))) v = ((Int16)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Single))) v = ((Single)value).ToString(CultureInfo.InvariantCulture); else if (type.IsAssignableFrom(typeof(Single))) v = ((Single)value).ToString(CultureInfo.InvariantCulture); else v = (decimal.Parse(value.ToString())).ToString(CultureInfo.InvariantCulture); } else if (type == typeof(bool)) { t = "b"; v = (bool)value ? "1" : "0"; } else if (type == typeof(DateTime)) { t = ; s = "3"; v = ((DateTime)value).ToOADate().ToString(CultureInfo.InvariantCulture); } else { v = EncodeXML(value.ToString()); } } var columname = ConvertXyToCell(cellIndex, rowIndex); if (v != && (v.StartsWith(" ", StringComparison.Ordinal) || v.EndsWith(" ", StringComparison.Ordinal))) writer.Write($"<x:c r=\"{columname}\" {(t == ? "" : $"t =\"{t}\"")} s=\"{s}\" xml:space=\"preserve\"><x:v>{v}</x:v></x:c>"); else writer.Write($"<x:c r=\"{columname}\" {(t == ? "" : $"t =\"{t}\"")} s=\"{s}\"><x:v>{v}</x:v></x:c>");}

bool IsNumericType(Type type, bool isableUnderlyingType = false){ if (isableUnderlyingType) type = able.GetUnderlyingType(type) ?? type; switch (Type.GetTypeCode(type)) { //case TypeCode.Byte: //case TypeCode.SByte: case TypeCode.UInt16: case TypeCode.UInt32: case TypeCode.UInt64: case TypeCode.Int16: case TypeCode.Int32: case TypeCode.Int64: case TypeCode.Decimal: case TypeCode.Double: case TypeCode.Single: return true; default: return false; }}

string EncodeXML(string value) => value == ? string.Empty : XmlEncoder.EncodeString(value) .Replace("&", "&amp;") .Replace("<", "&lt;") .Replace(">", "&gt;") .Replace("\"", "&quot;") .Replace("'", "&apos;") .ToString(); public class ExcelZipArchive : ZipArchive{ public ExcelZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding entryNameEncoding) : base(stream, mode, leaveOpen, entryNameEncoding) { }

public new void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); }}

class XmlEncoder{ private static readonly Regex xHHHHRegex = new Regex("_(x[\\dA-Fa-f]{4})_", RegexOptions.Compiled); private static readonly Regex Uppercase_X_HHHHRegex = new Regex("_(X[\\dA-Fa-f]{4})_", RegexOptions.Compiled);

public static StringBuilder EncodeString(string encodeStr) { if (encodeStr == ) return ; encodeStr = xHHHHRegex.Replace(encodeStr, "_x005F_$1_"); var sb = new StringBuilder(encodeStr.Length); foreach (var ch in encodeStr) { if (XmlConvert.IsXmlChar(ch)) sb.Append(ch); else sb.Append(XmlConvert.EncodeName(ch.ToString())); } return sb; }

public static string DecodeString(string decodeStr) { if (string.IsOrEmpty(decodeStr)) return string.Empty; decodeStr = Uppercase_X_HHHHRegex.Replace(decodeStr, "_x005F_$1_"); return XmlConvert.DecodeName(decodeStr); }

private static readonly Regex EscapeRegex = new Regex("_x([0-9A-F]{4,4})_"); public static string ConvertEscapeChars(string input) { return EscapeRegex.Replace(input, m => ((char)uint.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString()); }}

internal class ZipPackageInfo{ public ZipArchiveEntry ZipArchiveEntry { get; set; } public string ContentType { get; set; } public ZipPackageInfo(ZipArchiveEntry zipArchiveEntry, string contentType) { this.ZipArchiveEntry = zipArchiveEntry; ContentType = contentType; }}

标签: #phpspreadsheet内存溢出