两个html页面合并_html转word c++

编程文档 (65) 2023-03-24 15:01


所以今天给大家推荐一个使用 Open XML 文档(DOCX、XLSX 和 PPTX)编程接口,在此基础上进行了很多优化、并实现DOCX、PPTX、Html等文件合并、拆分、互相转换等实用的功能。


这是一个Open XML 文档编程接口开发的,并扩展了Open XML SDK的功能。







6、支持正则表达式搜索和替换 DOCX/PPTX 中的内容;


8、更新 DOCX/PPTX 文件中的图表,包括更新缓存数据以及嵌入的 XLSX;






1、平台:net45;net46;netstandard2.0 开发

2、开发工具:Visual Studio 2017


两个html页面合并_html转word c++_https://bianchenghao6.com/blog_编程文档_第1张




public static void ConvertToHtml(string file, string outputDirectory)
var fi = new FileInfo(file);
byte[] byteArray = File.ReadAllBytes(fi.FullName);
using (MemoryStream memoryStream = new MemoryStream())
            memoryStream.Write(byteArray, 0, byteArray.Length);
using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
if (outputDirectory != null && outputDirectory != string.Empty)
                    DirectoryInfo di = new DirectoryInfo(outputDirectory);
if (!di.Exists)
throw new OpenXmlPowerToolsException("Output directory does not exist");
                    destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
int imageCounter = 0;

var pageTitle = fi.FullName;
var part = wDoc.CoreFilePropertiesPart;
if (part != null)
                    pageTitle = (string) part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName;

// TODO: Determine max-width from size of content area.
                HtmlConverterSettings settings = new HtmlConverterSettings()
                    AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                    PageTitle = pageTitle,
                    FabricateCssClasses = true,
                    CssClassPrefix = "pt-",
                    RestrictToSupportedLanguages = false,
                    RestrictToSupportedNumberingFormats = false,
                    ImageHandler = imageInfo =>
                        DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
if (!localDirInfo.Exists)
string extension = imageInfo.ContentType.Split('/')[1].ToLower();
                        ImageFormat imageFormat = null;
if (extension == "png")
                            imageFormat = ImageFormat.Png;
else if (extension == "gif")
                            imageFormat = ImageFormat.Gif;
else if (extension == "bmp")
                            imageFormat = ImageFormat.Bmp;
else if (extension == "jpeg")
                            imageFormat = ImageFormat.Jpeg;
else if (extension == "tiff")
// Convert tiff to gif.
                            extension = "gif";
                            imageFormat = ImageFormat.Gif;
else if (extension == "x-wmf")
                            extension = "wmf";
                            imageFormat = ImageFormat.Wmf;

// If the image format isn't one that we expect, ignore it,
// and don't return markup for the link.
if (imageFormat == null)
return null;

string imageFileName = imageDirectoryName + "/image" +
                            imageCounter.ToString() + "." + extension;
                            imageInfo.Bitmap.Save(imageFileName, imageFormat);
catch (System.Runtime.InteropServices.ExternalException)
return null;
string imageSource = localDirInfo.Name + "/image" +
                            imageCounter.ToString() + "." + extension;

                        XElement img = new XElement(Xhtml.img,
new XAttribute(NoNamespace.src, imageSource),
                            imageInfo.AltText != null ?
new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
return img;
                XElement htmlElement = HtmlConverter.ConvertToHtml(wDoc, settings);

// Produce HTML document with <!DOCTYPE html > declaration to tell the browser
// we are using HTML5.
var html = new XDocument(
new XDocumentType("html", null, null, null),
var htmlString = html.ToString(SaveOptions.DisableFormatting);
                File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);


var n = DateTime.Now;
var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second));

var sourceDi = new DirectoryInfo("../../");
foreach (var file in sourceDi.GetFiles("*.docx"))
                File.Copy(file.FullName, Path.Combine(tempDi.FullName, file.Name));
foreach (var file in sourceDi.GetFiles("*.pptx"))
                File.Copy(file.FullName, Path.Combine(tempDi.FullName, file.Name));

var fileList = Directory.GetFiles(tempDi.FullName, "*.docx");
foreach (var file in fileList)
var fi = new FileInfo(file);
var newFileName = "Updated-" + fi.Name;
var fi2 = new FileInfo(Path.Combine(tempDi.FullName, newFileName));
                File.Copy(fi.FullName, fi2.FullName);

using (var wDoc = WordprocessingDocument.Open(fi2.FullName, true))
var chart1Data = new ChartData
                        SeriesNames = new[] {
                        CategoryDataType = ChartDataType.String,
                        CategoryNames = new[] {
                        Values = new double[][] {
new double[] {
100, 310, 220, 450,
new double[] {
200, 300, 350, 411,
new double[] {
80, 120, 140, 600,
new double[] {
120, 100, 140, 400,
new double[] {
200, 210, 210, 480,
                    ChartUpdater.UpdateChart(wDoc, "Chart1", chart1Data);

var chart2Data = new ChartData
                        SeriesNames = new[] {
                        CategoryDataType = ChartDataType.String,
                        CategoryNames = new[] {
                        Values = new double[][] {
new double[] {
320, 112, 64, 80,
                    ChartUpdater.UpdateChart(wDoc, "Chart2", chart2Data);

var chart3Data = new ChartData
                        SeriesNames = new[] {
                        CategoryDataType = ChartDataType.String,
                        CategoryNames = new[] {
                        Values = new double[][] {
new double[] {      3.0,      2.1,       .7,      .7,      2.1,      3.0,      },
new double[] {      3.0,      2.1,       .8,      .8,      2.1,      3.0,      },
new double[] {      3.0,      2.4,      1.2,     1.2,      2.4,      3.0,      },
new double[] {      3.0,      2.7,      1.7,     1.7,      2.7,      3.0,      },
new double[] {      3.0,      2.9,      2.5,     2.5,      2.9,      3.0,      },
new double[] {      3.0,      3.0,      3.0,     3.0,      3.0,      3.0,      },
                    ChartUpdater.UpdateChart(wDoc, "Chart3", chart3Data);

var chart4Data = new ChartData
                        SeriesNames = new[] {
                        CategoryDataType = ChartDataType.DateTime,
                        CategoryFormatCode = 14,
                        CategoryNames = new[] {
                            ToExcelInteger(new DateTime(2013, 9, 1)),
                            ToExcelInteger(new DateTime(2013, 9, 2)),
                            ToExcelInteger(new DateTime(2013, 9, 3)),
                            ToExcelInteger(new DateTime(2013, 9, 4)),
                            ToExcelInteger(new DateTime(2013, 9, 5)),
                            ToExcelInteger(new DateTime(2013, 9, 6)),
                            ToExcelInteger(new DateTime(2013, 9, 7)),
                            ToExcelInteger(new DateTime(2013, 9, 8)),
                            ToExcelInteger(new DateTime(2013, 9, 9)),
                            ToExcelInteger(new DateTime(2013, 9, 10)),
                            ToExcelInteger(new DateTime(2013, 9, 11)),
                            ToExcelInteger(new DateTime(2013, 9, 12)),
                            ToExcelInteger(new DateTime(2013, 9, 13)),
                            ToExcelInteger(new DateTime(2013, 9, 14)),
                            ToExcelInteger(new DateTime(2013, 9, 15)),
                            ToExcelInteger(new DateTime(2013, 9, 16)),
                            ToExcelInteger(new DateTime(2013, 9, 17)),
                            ToExcelInteger(new DateTime(2013, 9, 18)),
                            ToExcelInteger(new DateTime(2013, 9, 19)),
                            ToExcelInteger(new DateTime(2013, 9, 20)),
                        Values = new double[][] {
new double[] {
1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 5, 4, 5, 6, 7, 8, 7, 8, 8, 9,
new double[] {
2, 3, 3, 4, 4, 5, 6, 7, 8, 7, 8, 9, 9, 9, 7, 8, 9, 9, 10, 11,
new double[] {
2, 3, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 4, 4, 4, 3, 4, 5, 5, 4,
                    ChartUpdater.UpdateChart(wDoc, "Chart4", chart4Data);

            fileList = Directory.GetFiles(tempDi.FullName, "*.pptx");
foreach (var file in fileList)
var fi = new FileInfo(file);
var newFileName = "Updated-" + fi.Name;
var fi2 = new FileInfo(Path.Combine(tempDi.FullName, newFileName));
                File.Copy(fi.FullName, fi2.FullName);

using (var pDoc = PresentationDocument.Open(fi2.FullName, true))
var chart1Data = new ChartData
                        SeriesNames = new[] {
                        CategoryDataType = ChartDataType.String,
                        CategoryNames = new[] {
                        Values = new double[][] {
new double[] {
320, 310, 320, 330,
new double[] {
201, 224, 230, 221,
new double[] {
180, 200, 220, 230,
                    ChartUpdater.UpdateChart(pDoc, 1, chart1Data);


var n = DateTime.Now;
var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second));

// Change sheet name in formulas
using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(
using (SpreadsheetDocument doc = streamDoc.GetSpreadsheetDocument())
                    WorksheetAccessor.FormulaReplaceSheetName(doc, "Source", "'Source 2'");
                streamDoc.GetModifiedSmlDocument().SaveAs(Path.Combine(tempDi.FullName, "FormulasUpdated.xlsx"));

// Change sheet name in formulas
using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(
using (SpreadsheetDocument doc = streamDoc.GetSpreadsheetDocument())
                    WorksheetPart sheet = WorksheetAccessor.GetWorksheet(doc, "References");
                    WorksheetAccessor.CopyCellRange(doc, sheet, 1, 1, 7, 5, 4, 8);
                streamDoc.GetModifiedSmlDocument().SaveAs(Path.Combine(tempDi.FullName, "FormulasCopied.xlsx"));


两个html页面合并_html转word c++_https://bianchenghao6.com/blog_编程文档_第2张



- End -


  • 盘点阿里、腾讯、百度大厂C#开源项目
  • 一个基于C#开发的轻量级OCR文字识别开源工具
  • 一个C#开发的、跨平台的服务器性能监控工具
  • 基于Asp.Net Mvc开发的个人博客系统
  • 推荐一个.Net Core开源轻量级插件架构
  • 一个基于.Net Core 开源的物联网基础平台


