[gs-commits] ghostpdl branch, master, updated. ghostpdl-9.02-679-gcf7e755

Ken Sharp ken at ghostscript.com
Tue Oct 25 09:06:15 UTC 2011


The ghostpdl branch, master has been updated
       via  cf7e75532e0f16c819954b71a686fb5b83c47c61 (commit)
      from  b5fa52b7e60a7eb23b77c4fd117ed37eb04263b5 (commit)

----------------------------------------------------------------------
commit cf7e75532e0f16c819954b71a686fb5b83c47c61
Author: Ken Sharp <ken.sharp at artifex.com>
Date:   Tue Oct 25 10:05:53 2011 +0100

    Add UTF-8 as an output option to the txtwrite device.
    
    The TextFormat parameter now has three values; 0, 1 or 2 to allow for 'XML'
    output with full information, simple UCS2 or simple UTF-8 output.
    
    Altered the meanings of the TextFormat so that 0 is now the 'XML' output
    which allows a more sensible selection of 1 for UCS2 or 2 for UTF-8, since
    these output foramts are otherwise identical. Modified the default to be
    '2' (UTF-8).
    
    Updated the documentation in devices.htm.
    
    Also removed a number of C++ comments.
    
    No differences expected, this device is not cluster tested.

diff --git a/gs/base/gdevtxtw.c b/gs/base/gdevtxtw.c
index 0111553..7919cd9 100644
--- a/gs/base/gdevtxtw.c
+++ b/gs/base/gdevtxtw.c
@@ -170,45 +170,45 @@ const gx_device_txtwrite_t gs_txtwrite_device =
                         X_DPI, Y_DPI,
                         1, 8, 255, 0, 256, 1),
     {txtwrite_open_device,
-     NULL, //gx_upright_get_initial_matrix,
-     NULL, //gx_default_sync_output,
+     NULL, /*gx_upright_get_initial_matrix,*/
+     NULL, /*gx_default_sync_output,*/
      txtwrite_output_page,
      txtwrite_close_device,
-     NULL, //gx_default_gray_map_rgb_color,
-     NULL, //gx_default_gray_map_color_rgb,
+     NULL, /*gx_default_gray_map_rgb_color,*/
+     NULL, /*gx_default_gray_map_color_rgb,*/
      txtwrite_fill_rectangle,               /* Can't be NULL and there is no gx_default_fill_rectangle! */
-     NULL, //gx_default_tile_rectangle,
-     NULL, //gx_default_copy_mono,
-     NULL, //gx_default_copy_color,
-     NULL, //gx_default_draw_line,
-     NULL, //gx_default_get_bits,
+     NULL, /*gx_default_tile_rectangle,*/
+     NULL, /*gx_default_copy_mono,*/
+     NULL, /*gx_default_copy_color,*/
+     NULL, /*gx_default_draw_line,*/
+     NULL, /*gx_default_get_bits,*/
      txtwrite_get_params,
      txtwrite_put_params,
-     NULL, //gx_default_map_cmyk_color,
-     NULL, //gx_default_get_xfont_procs,
-     NULL, //gx_default_get_xfont_device,
-     NULL, //gx_default_map_rgb_alpha_color,
-     NULL, //gx_page_device_get_page_device,
+     NULL, /*gx_default_map_cmyk_color,*/
+     NULL, /*gx_default_get_xfont_procs,*/
+     NULL, /*gx_default_get_xfont_device,*/
+     NULL, /*gx_default_map_rgb_alpha_color,*/
+     NULL, /*gx_page_device_get_page_device,*/
      NULL,			/* get_alpha_bits */
-     NULL, //gx_default_copy_alpha,
+     NULL, /*gx_default_copy_alpha,*/
      NULL,			/* get_band */
      NULL,			/* copy_rop */
      txtwrite_fill_path,
      txtwrite_stroke_path,
-     NULL, //gx_default_fill_mask,
-     NULL, //gx_default_fill_trapezoid,
-     NULL, //gx_default_fill_parallelogram,
-     NULL, //gx_default_fill_triangle,
-     NULL, //gx_default_draw_thin_line,
+     NULL, /*gx_default_fill_mask,*/
+     NULL, /*gx_default_fill_trapezoid,*/
+     NULL, /*gx_default_fill_parallelogram,*/
+     NULL, /*gx_default_fill_triangle,*/
+     NULL, /*gx_default_draw_thin_line,*/
      NULL,                      /* begin image */
      NULL,			/* image_data */
      NULL,			/* end_image */
-     NULL, //gx_default_strip_tile_rectangle,
-     NULL, //gx_default_strip_copy_rop,
+     NULL, /*gx_default_strip_tile_rectangle,*/
+     NULL, /*gx_default_strip_copy_rop,*/
      NULL,			/* get_clipping_box */
      NULL,                      /* txtwrite_begin_typed_image */
      NULL,			/* get_bits_rectangle */
-     NULL, //gx_default_map_color_rgb_alpha,
+     NULL, /*gx_default_map_color_rgb_alpha,*/
      gx_null_create_compositor,
      NULL,			/* get_hardware_params */
      txtwrite_text_begin,
@@ -255,6 +255,7 @@ txtwrite_open_device(gx_device * dev)
     if (tdev->fname[0] == 0)
         return_error(gs_error_undefinedfilename);
 
+    tdev->TextFormat = 2;
     tdev->PageData.PageNum = 0;
     tdev->PageData.y_ordered_list = NULL;
     tdev->file = NULL;
@@ -279,6 +280,45 @@ txtwrite_close_device(gx_device * dev)
     return code;
 }
 
+static int write_simple_text(unsigned short *text, int count, gx_device_txtwrite_t *tdev)
+{
+    switch(tdev->TextFormat) {
+        case 1:
+            fwrite(text, sizeof (unsigned short), count, tdev->file);
+            break;
+        case 2:
+            {
+                int i;
+                unsigned short *UTF16 = (unsigned short *)text;
+                unsigned char UTF8[3];
+
+                for (i=0;i<count;i++) {
+                    if (*UTF16 < 0x80) {
+                        UTF8[0] = *UTF16 & 0xff;
+                        fwrite (UTF8, sizeof(unsigned char), 1, tdev->file);
+                    } else {
+                        if (*UTF16 < 0x800) {
+                            UTF8[0] = (*UTF16 >> 11) + 0xC0;
+                            UTF8[1] = (*UTF16 & 0x3F) + 0x80;
+                            fwrite (UTF8, sizeof(unsigned char), 2, tdev->file);
+                        } else {
+                            UTF8[0] = (*UTF16 >> 12) + 0xE0;
+                            UTF8[1] = ((*UTF16 >> 6) & 0x3F) + 0x80;
+                            UTF8[2] = (*UTF16 & 0x3F) + 0x80;
+                            fwrite (UTF8, sizeof(unsigned char), 3, tdev->file);
+                        }
+                    }
+                    UTF16++;
+                }
+            }
+            break;
+        default:
+            return gs_note_error(gs_error_rangecheck);
+            break;
+    }
+    return 0;
+}
+
 static int simple_text_output(gx_device_txtwrite_t *tdev)
 {
     int code, chars_wide;
@@ -293,11 +333,12 @@ static int simple_text_output(gx_device_txtwrite_t *tdev)
 
     /* Write out the page number as a unicode string */
     sprintf(PageNum, "PAGE:%d\n", tdev->PageData.PageNum++);
+    if (tdev->TextFormat == 1)
+        fwrite(&BOM, sizeof(unsigned short), 1, tdev->file);
     p = (char *)&PageNum;
-    fwrite(&BOM, sizeof(unsigned short), 1, tdev->file);
     while(*p != 0x00){
         u = *p++;
-        fwrite(&u, sizeof(unsigned short), 1, tdev->file);
+        write_simple_text(&u, 1, tdev);
     }
 
     /* First lets try and consolidate horizontal lines of text */
@@ -519,10 +560,10 @@ static int simple_text_output(gx_device_txtwrite_t *tdev)
         x_entry = y_list->x_ordered_list;
         while (x_entry) {
             while (xpos < x_entry->start.x) {
-                fwrite(&UnicodeSpace, sizeof (unsigned short), 1, tdev->file);
+                write_simple_text(&UnicodeSpace, 1, tdev);
                 xpos += char_size;
             }
-            fwrite(x_entry->Unicode_Text, sizeof (unsigned short), x_entry->Unicode_Text_Size, tdev->file);
+            write_simple_text(x_entry->Unicode_Text, x_entry->Unicode_Text_Size, tdev);
             xpos += x_entry->Unicode_Text_Size * char_size;
             if (x_entry->next) {
                 x_entry = x_entry->next;
@@ -530,7 +571,7 @@ static int simple_text_output(gx_device_txtwrite_t *tdev)
                 x_entry = NULL;
             }
         }
-        fwrite(&UnicodeEOL, sizeof(unsigned short), 2, tdev->file);
+        write_simple_text((unsigned short *)&UnicodeEOL, 2, tdev);
         if (y_list->next) {
             y_list = y_list->next;
         } else {
@@ -582,17 +623,18 @@ txtwrite_output_page(gx_device * dev, int num_copies, int flush)
 
     switch(tdev->TextFormat) {
         case 0:
-            code = simple_text_output(tdev);
-            if (code < 0)
-                return code;
-            break;
-
-        case 1:
             code = decorated_text_output(tdev);
             if (code < 0)
                 return code;
             break;
 
+        case 1:
+        case 2:
+            code = simple_text_output(tdev);
+            if (code < 0)
+                return code;
+            break;
+
         default:
             return gs_note_error(gs_error_rangecheck);
             break;
@@ -1520,7 +1562,7 @@ int txtwrite_process_cmap_text(gs_text_enum_t *pte)
         gs_glyph glyph;
         gs_glyph_info_t info;
         int font_code, font_index, code;
-        gs_font *subfont;//new_font = 0;
+        gs_font *subfont;
         gs_char chr;
         gs_matrix scale_c, scale_o;
         gs_font_info_t finfo;
diff --git a/gs/doc/Devices.htm b/gs/doc/Devices.htm
index 670e321..b24604b 100644
--- a/gs/doc/Devices.htm
+++ b/gs/doc/Devices.htm
@@ -761,11 +761,12 @@ and should not be considered as being of production quality.
 <h4>Options</h4>
 <blockquote>
 <dl>
-<dt><code>-dTextFormat=<em>0 | 1</em></code> (default is 0)
-<dd>Format 0 outputs Unicode text (with a Byte Order Mark) which approximates
-to the layout of the text in the original document. Format 1 is intended
-for use by developers and outputs Unicode along with information regarding
-the format of the text (position, font name, point size, etc).
+<dt><code>-dTextFormat=<em>0 | 1 | 2 </em></code> (default is 2)
+<dd>Format 0 is intended for use by developers and outputs Unicode along with
+information regarding the format of the text (position, font name, point size,
+etc). Format 1 outputs Unicode (UCS2) text (with a Byte Order Mark) which
+approximates to the layout of the text in the original document. Format 2 is
+the same as format 1, but the text is encoded in UTF-8.
 </dl></blockquote>
 
 <p>


Summary of changes:
 gs/base/gdevtxtw.c |  104 ++++++++++++++++++++++++++++++++++++---------------
 gs/doc/Devices.htm |   11 +++--
 2 files changed, 79 insertions(+), 36 deletions(-)



More information about the gs-commits mailing list