1

The application I'm working on generates pdf reports from html files using itextsharp library. Apparently, when large tables are generated, itextsharp allocates a lot of memory (~300 MB for a 200KB file). A solution to this problem is adding the table little by little to the document (so all existing data in table will be flushed), as described in the following links:

Question: How can I add a pdfptable in steps when generating the pdf from an existing html file?

Here is my code:

public byte[] GetReportPdf(string template, string cssString)
    {
        byte[] result;

        using (var stream = new MemoryStream())
        {
            using (
                var doc = new Document(
                    this.Settings.Size,
                    this.Settings.Margins.Left,
                    this.Settings.Margins.Right,
                    this.Settings.Margins.Top,
                    this.Settings.Margins.Bottom))
            {
                using (var writer = PdfWriter.GetInstance(doc, stream))
                {
                    // adding the page event, or null
                    writer.PageEvent = this.Settings.PageEvent;

                    doc.Open();

                    // CSS
                    var cssResolver = new StyleAttrCSSResolver();

                    using (var cssStream = new MemoryStream(Encoding.UTF8.GetBytes(cssString)))
                    {
                        var cssFile = XMLWorkerHelper.GetCSS(cssStream);
                        cssResolver.AddCss(cssFile);
                    }

                    // HTML
                    var fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
                    var cssAppliers = new CssAppliersImpl(fontProvider);
                    var htmlContext = new HtmlPipelineContext(cssAppliers);
                    htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());

                    // pipelines
                    var pdf = new PdfWriterPipeline(doc, writer);
                    var html = new HtmlPipeline(htmlContext, pdf);
                    var css = new CssResolverPipeline(cssResolver, html);

                    // XML worker
                    var worker = new XMLWorker(css, true);
                    var parser = new XMLParser(worker);
                    using (var stringReader = new StringReader(template))
                    {
                        parser.Parse(stringReader);
                    }


                    doc.Close();
                }
            }

            result = stream.ToArray();
        }

        return result;

Notes:

  1. The solution in the previous links are not using an html to create the pdf
  2. The steps described are: set table complete property to false, add every 50 table rows to the document, set the table complete property to true.
  3. Using an AbstractTagProcessor, I managed to set the table complete property when the html is parsed, but found no option on how to trigger table adding while it's generated.
  4. itextsharp version 5.5.10.0
  5. itextsharp.xmlworker version 5.5.10.0

var tagFactory = Tags.GetHtmlTagProcessorFactory();

tagFactory.AddProcessor(new TableTagProcessor(doc), new string[]{"table"});

public class TableTagProcessor : iTextSharp.tool.xml.html.table.Table{

public override IList<IElement> Start(IWorkerContext ctx, Tag tag)

{
    var result = base.Start(ctx, tag);
    foreach (PdfPTable table in result.OfType<PdfPTable>())
    {
        table.Complete = false;
    }

    return result;
}

public override IList<IElement> End(IWorkerContext ctx, Tag tag, IList<IElement> currentContent)
{
    var result = base.End(ctx, tag, currentContent);
    foreach (PdfPTable table in result.OfType<PdfPTable>())
    {
        table.Complete = true;
    }

    return result;
}}

0 Answers0