[gs-commits] rev 11599 - trunk/gs/base

robin at ghostscript.com robin at ghostscript.com
Wed Aug 4 23:15:25 UTC 2010


Author: robin
Date: 2010-08-04 23:15:24 +0000 (Wed, 04 Aug 2010)
New Revision: 11599

Modified:
   trunk/gs/base/gxp1fill.c
Log:
Fix for Bug 691504 (gs taking a long time to render a file).

Revision 8694 changes code from using a simple cast to an int to using
a call to (int)floor. This fixes problems, but costs a factor of 3 in
processing time for heavily tiled files.

Here we change to using a macro version of floor instead. This removes
the function call and NaN handling overheads and returns us to the
same kind of performance as we had before.

We apply the same change throughout the file (i.e. all the calls to (int)floor,
not just the two changed in r8694.

There is still scope for optimising this code further, but this closes the
regression.

No differences seen in local cluster testing.



Modified: trunk/gs/base/gxp1fill.c
===================================================================
--- trunk/gs/base/gxp1fill.c	2010-08-04 22:59:04 UTC (rev 11598)
+++ trunk/gs/base/gxp1fill.c	2010-08-04 23:15:24 UTC (rev 11599)
@@ -31,6 +31,8 @@
 #include "gxblend.h"
 #include "gsicc_cache.h"
 
+#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
+
 /* Define the state for tile filling. */
 typedef struct tile_fill_state_s {
 
@@ -103,9 +105,9 @@
      * tile_by_steps loop, but for simple tiles, we must set it now.
      */
     if (set_mask_phase && m_tile->is_simple) {
-	px = imod(-(int)floor(m_tile->step_matrix.tx - ptfs->phase.x + 0.5),
+        px = imod(-(int)fastfloor(m_tile->step_matrix.tx - ptfs->phase.x + 0.5),
 		  m_tile->tmask.rep_width);
-	py = imod(-(int)floor(m_tile->step_matrix.ty - ptfs->phase.y + 0.5),
+        py = imod(-(int)fastfloor(m_tile->step_matrix.ty - ptfs->phase.y + 0.5),
 		  m_tile->tmask.rep_height);
     } else
 	px = py = 0;
@@ -163,17 +165,17 @@
 	v1 = ibbox.q.y - min(ptile->bbox.q.y, 0) + 0.000001;
 	if (!ptile->is_simple)
 	    u0 -= bbw, v0 -= bbh, u1 += bbw, v1 += bbh;
-	i0 = (int)floor(u0);
-	j0 = (int)floor(v0);
+        i0 = (int)fastfloor(u0);
+        j0 = (int)fastfloor(v0);
 	i1 = (int)ceil(u1);
 	j1 = (int)ceil(v1);
     }
     if_debug4('T', "[T]i=(%d,%d) j=(%d,%d)\n", i0, i1, j0, j1);
     for (i = i0; i < i1; i++)
 	for (j = j0; j < j1; j++) {
-	    int x = (int)floor(step_matrix.xx * i +
+            int x = (int)fastfloor(step_matrix.xx * i +
 			  step_matrix.yx * j + step_matrix.tx);
-	    int y = (int)floor(step_matrix.xy * i +
+            int y = (int)fastfloor(step_matrix.xy * i +
 			  step_matrix.yy * j + step_matrix.ty);
 	    int w = tbits_or_tmask->size.x;
 	    int h = tbits_or_tmask->size.y;
@@ -313,10 +315,10 @@
 	return code;
     if (ptile->is_simple && ptile->cdev == NULL) {
 	int px =
-	    imod(-(int)floor(ptile->step_matrix.tx - state.phase.x + 0.5),
+            imod(-(int)fastfloor(ptile->step_matrix.tx - state.phase.x + 0.5),
 		 bits->rep_width);
 	int py =
-	    imod(-(int)floor(ptile->step_matrix.ty - state.phase.y + 0.5),
+            imod(-(int)fastfloor(ptile->step_matrix.ty - state.phase.y + 0.5),
 		 bits->rep_height);
 
 	if (state.pcdev != dev)
@@ -512,17 +514,17 @@
 	v1 = ibbox.q.y - min(ptile->bbox.q.y, 0) + 0.000001;
 	if (!ptile->is_simple)
 	    u0 -= bbw, v0 -= bbh, u1 += bbw, v1 += bbh;
-	i0 = (int)floor(u0);
-	j0 = (int)floor(v0);
+        i0 = (int)fastfloor(u0);
+        j0 = (int)fastfloor(v0);
 	i1 = (int)ceil(u1);
 	j1 = (int)ceil(v1);
     }
     if_debug4('T', "[T]i=(%d,%d) j=(%d,%d)\n", i0, i1, j0, j1);
     for (i = i0; i < i1; i++)
 	for (j = j0; j < j1; j++) {
-	    int x = (int)floor(step_matrix.xx * i +
+            int x = (int)fastfloor(step_matrix.xx * i +
 			  step_matrix.yx * j + step_matrix.tx);
-	    int y = (int)floor(step_matrix.xy * i +
+            int y = (int)fastfloor(step_matrix.xy * i +
 			  step_matrix.yy * j + step_matrix.ty);
 	    int w = ptrans_pat->width;
 	    int h = ptrans_pat->height;
@@ -607,7 +609,7 @@
 
     /* Now the middle part */
 
-    num_full_tiles = (int) floor((float) (w - left_width)/ (float) tile_width);
+    num_full_tiles = (int)fastfloor((float) (w - left_width)/ (float) tile_width);
 
     /* Now the right part */
 
@@ -790,10 +792,10 @@
         /* A simple case.  Tile is not clist and simple. */
 
 	int px =
-	    imod(-(int)floor(ptile->step_matrix.tx - state.phase.x + 0.5),
+            imod(-(int)fastfloor(ptile->step_matrix.tx - state.phase.x + 0.5),
 		  ptile->ttrans->width);
 	int py =
-	    imod(-(int)floor(ptile->step_matrix.ty - state.phase.y + 0.5),
+            imod(-(int)fastfloor(ptile->step_matrix.ty - state.phase.y + 0.5),
 		 ptile->ttrans->height);
 
         tile_rect_trans_simple(xmin, ymin, xmax, ymax, px, py, ptile,



More information about the gs-commits mailing list