[gs-commits] rev 11717 - trunk/gs/base

ken at ghostscript.com ken at ghostscript.com
Tue Sep 14 08:06:05 UTC 2010


Author: ken
Date: 2010-09-14 08:06:03 +0000 (Tue, 14 Sep 2010)
New Revision: 11717

Modified:
   trunk/gs/base/gdevpdfo.c
   trunk/gs/base/gdevpdfo.h
Log:
pdfwrite enhancement : performance improvement with type 3 fonts

Bug #690575 "PS to PDF Conversion extremely slow (possibly endless)"

The type 3 font code assembles CharProcs for type 3 fonts by writing them individually 
into a 'cos_stream'. Each time a new one is completed it is compared to all the existing
CharProcs to see if it is a duplicate. This was done by fseek/fread/memcmp operations.

As the number of CharProcs increases, the time spent seeking, reading and comparing
the data increases dramatically and performance becomes very poor. Not only that, but
the test is actually done twice for each new CharProc.

This patch tackles the problem by creating an md5 hash of the data written to a 
cos_write_stream (a subclassed cos_stream) as it is written. The cos_stream 'equal'
routine checks to see if the md5 hash is valid and if it is then compares the hashes.
If the md5 hash is not valid (ie not a cos_write_stream) then it uses the old 
seek/read/compare mechanism. This will improve the performance of any stored data
if it is stored using a cos_write_stream and compared against other data of the same 
type. (I don't believe we do this anywhere else currently, but I'm not suer)

This does improve the performance significantly, and the code no longer spends most of
its time waiting for I/O operations to complete. It is still slow, but this is the 
result of using lots of type 3 fonts. Because of the way these must be processed in 
order to capture the outlines they are never going to be fast.

In my test this runs 2-3 times faster than before. There should be no differences in
output from the old code.


Modified: trunk/gs/base/gdevpdfo.c
===================================================================
--- trunk/gs/base/gdevpdfo.c	2010-09-14 07:49:11 UTC (rev 11716)
+++ trunk/gs/base/gdevpdfo.c	2010-09-14 08:06:03 UTC (rev 11717)
@@ -1279,6 +1279,9 @@
 	gs_alloc_struct(mem, cos_stream_t, &st_cos_object, cname);
 
     cos_object_init((cos_object_t *)pcs, pdev, &cos_stream_procs);
+    pcs->md5_valid = 0;
+    gs_md5_init(&pcs->md5);
+    memset(&pcs->hash, 0x00, 16);
     return pcs;
 }
 
@@ -1298,12 +1301,37 @@
     cos_dict_release(pco, cname);
 }
 
+static void hash_cos_stream(gs_memory_t *mem, const cos_object_t *pco, FILE *sfile, uint64_t *hash)
+{
+    const cos_stream_t *pcs = (const cos_stream_t *)pco;
+    cos_stream_piece_t *pcsp = pcs->pieces;
+    byte *ptr;
+    long position_save = ftell(sfile);
+    int result;
+    gs_md5_state_t md5;
+
+    gs_md5_init(&md5);
+    
+    while(pcsp) {
+	ptr = gs_malloc(mem, sizeof (byte), pcsp->size, "hash_cos_stream");
+	fseek(sfile, pcsp->position, SEEK_SET);
+	if (fread(ptr, 1, pcsp->size, sfile) != pcsp->size) {
+	    result = gs_note_error(gs_error_ioerror);
+	    return result;
+	}
+	gs_md5_append(&md5, ptr, pcsp->size);
+	gs_free(mem, ptr, sizeof (byte), pcsp->size, "hash_cos_stream");
+	pcsp = pcsp->next;
+    }
+    fseek(sfile, position_save, SEEK_SET);
+    gs_md5_finish(&md5, (gs_md5_byte_t *)hash);
+}
+
 static int
 cos_stream_equal(const cos_object_t *pco0, const cos_object_t *pco1, gx_device_pdf *pdev)
 {
     const cos_stream_t *pcs0 = (const cos_stream_t *)pco0;
     const cos_stream_t *pcs1 = (const cos_stream_t *)pco1;
-    bool result = false;
     int code;
 
     code = cos_dict_equal(pco0, pco1, pdev);
@@ -1311,46 +1339,17 @@
 	return code;
     if (!code)
 	return false;
-    {
-	/* fixme : this assumes same segmentation for both streams.
-	   In general it is not true. */
-	FILE *sfile = pdev->streams.file;
-	cos_stream_piece_t *pcsp0 = pcs0->pieces, *pcsp1 = pcs1->pieces;
-	long position_save = ftell(sfile);
-
-	for (; pcsp0 && pcsp1; pcsp0 = pcsp0->next, pcsp1 = pcsp1->next) {
-	    long position0 = pcsp0->position;
-	    long position1 = pcsp1->position;
-	    uint size0 = pcsp0->size;
-	    uint size1 = pcsp1->size;
-	    byte buf0[512], buf1[sizeof(buf0)];
-
-	    if (size0 != size1)
-		goto notequal;
-	    for(; size0; position0 += size1, position1 += size1, size0 -= size1) {
-		size1 = min(sizeof(buf0), size0);
-		fseek(sfile, position0, SEEK_SET);
-		if (fread(buf0, 1, size1, sfile) != size1) {
-		    result = gs_note_error(gs_error_ioerror);
-		    goto notequal;
+    if (!pco0->md5_valid) {
+	hash_cos_stream(pdev->memory, pco0, pdev->streams.file, (uint64_t *)&pcs0->hash);
 		}
-		fseek(sfile, position1, SEEK_SET);
-		if (fread(buf1, 1, size1, sfile) != size1) {
-		    result = gs_note_error(gs_error_ioerror);
-		    goto notequal;
+    if (!pco1->md5_valid) {
+	hash_cos_stream(pdev->memory, pco1, pdev->streams.file, (uint64_t *)&pcs1->hash);
 		}
-		if (memcmp(buf0, buf1, size1))
-		    goto notequal;
+    if (memcmp(&pcs0->hash, &pcs1->hash, 16) == 0)
+	return true;
+    return false;
+
 	    }
-	}
-	if (pcsp0 || pcsp1)
-	    goto notequal;
-	result = true;
-notequal:
-	fseek(sfile, position_save, SEEK_SET);
-	return result;
-    }
-}
 
 /* Find the total length of a stream. */
 long
@@ -1554,6 +1553,7 @@
     int code;
 
     stream_write(target, pr->ptr + 1, count);
+    gs_md5_append(&ss->pcs->md5, pr->ptr + 1, count);
     pr->ptr = pr->limit;
     sflush(target);
     code = cos_stream_add(ss->pcs, (uint)(stell(pdev->streams.strm) - start_pos));
@@ -1567,6 +1567,8 @@
 
     sflush(s);
     status = s_close_filters(&ss->target, ss->pdev->streams.strm);
+    gs_md5_finish(&ss->pcs->md5, (gs_md5_byte_t *)ss->pcs->hash);
+    ss->pcs->md5_valid = 1;
     return (status < 0 ? status : s_std_close(s));
 }
 
@@ -1592,6 +1594,9 @@
 	goto fail;
     ss->template = &cos_write_stream_template;
     ss->pcs = pcs;
+    ss->pcs->md5_valid = 0;
+    gs_md5_init(&ss->pcs->md5);
+    memset(&ss->pcs->hash, 0x00, 16);
     ss->pdev = pdev;
     ss->s = s;
     ss->target = pdev->streams.strm; /* not s->strm */

Modified: trunk/gs/base/gdevpdfo.h
===================================================================
--- trunk/gs/base/gdevpdfo.h	2010-09-14 07:49:11 UTC (rev 11716)
+++ trunk/gs/base/gdevpdfo.h	2010-09-14 08:06:03 UTC (rev 11717)
@@ -34,6 +34,7 @@
  */
 
 #include "gsparam.h"
+#include "smd5.h"
 
 /* Define some needed abstract types. */
 #ifndef gx_device_pdf_DEFINED
@@ -114,6 +115,9 @@
     byte written;		/* see above */\
     long length;                /* only for stream objects */\
     stream *input_strm;		/* only for stream objects */\
+    gs_md5_state_t md5;         /* only for stream objects */\
+    int md5_valid;              /* only for stream objects */\
+    byte hash[16];		/* only for stream objects */\
     /* input_strm is introduced recently for pdfmark. */\
     /* Using this field, psdf_binary_writer_s may be simplified. */\
 }



More information about the gs-commits mailing list