summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extract/src/document.h')
-rw-r--r--extract/src/document.h93
1 files changed, 76 insertions, 17 deletions
diff --git a/extract/src/document.h b/extract/src/document.h
index 2dc4f1ee..69c4232c 100644
--- a/extract/src/document.h
+++ b/extract/src/document.h
@@ -26,6 +26,17 @@ typedef struct
point_t max;
} rect_t;
+extern const rect_t extract_rect_infinite;
+extern const rect_t extract_rect_empty;
+
+rect_t extract_rect_intersect(rect_t a, rect_t b);
+
+rect_t extract_rect_union(rect_t a, rect_t b);
+
+int extract_rect_contains_rect(rect_t a, rect_t b);
+
+int extract_rect_valid(rect_t a);
+
const char* extract_rect_string(const rect_t* rect);
typedef struct
@@ -56,13 +67,15 @@ typedef struct
/* (x,y) before transformation by ctm and trm. */
double pre_x;
double pre_y;
-
+
/* (x,y) after transformation by ctm and trm. */
double x;
double y;
-
+
unsigned ucs;
double adv;
+
+ rect_t bbox;
} char_t;
/* A single char in a span.
*/
@@ -72,15 +85,15 @@ typedef struct
matrix_t ctm;
matrix_t trm;
char* font_name;
-
+
/* font size is extract_matrix_cmp4(trm). */
-
+
struct {
unsigned font_bold : 1;
unsigned font_italic : 1;
unsigned wmode : 1;
} flags;
-
+
char_t* chars;
int chars_num;
} span_t;
@@ -138,10 +151,10 @@ typedef struct
double h;
void* data;
size_t data_size;
-
+
extract_image_data_free data_free;
void* data_free_handle;
-
+
} image_t;
/* Information about an image. <type> is as passed to extract_add_image();
<name> and <id> are created to be unique identifiers for use in generated docx
@@ -166,18 +179,18 @@ typedef struct
typedef struct
{
rect_t rect;
-
+
/* If left/above is true, this cell is not obscured by cell to its
left/above. */
uint8_t left;
uint8_t above;
-
+
/* extend_right and extend_down are 1 for normal cells, 2 for cells which
extend right/down to cover an additional column/row, 3 to cover two
additional columns/rows etc. */
int extend_right;
int extend_down;
-
+
/* Contents of this cell. */
line_t** lines;
int lines_num;
@@ -192,7 +205,7 @@ void extract_cell_free(extract_alloc_t* alloc, cell_t** pcell);
typedef struct
{
point_t pos; /* top-left. */
-
+
/* Array of cells_num_x*cells_num_y cells; cell (x, y) is:
cells_num_x * y + x.
*/
@@ -202,11 +215,30 @@ typedef struct
} table_t;
+typedef enum
+{
+ SPLIT_NONE = 0,
+ SPLIT_HORIZONTAL,
+ SPLIT_VERTICAL
+} split_type_t;
+
+
+typedef struct split_t
+{
+ split_type_t type;
+ double weight;
+ int count;
+ struct split_t *split[1];
+} split_t;
+
+
typedef struct
{
+ rect_t mediabox;
+
span_t** spans;
int spans_num;
-
+
image_t* images;
int images_num;
@@ -219,16 +251,27 @@ typedef struct
int paragraphs_num;
/* These refer to items in .lines. Initially empty, then set
by extract_join(). */
-
+
tablelines_t tablelines_horizontal;
tablelines_t tablelines_vertical;
-
+
table_t** tables;
int tables_num;
+} subpage_t;
+/* A subpage. Contains different representations of the list of spans. */
+
+typedef struct
+{
+ rect_t mediabox;
+
+ subpage_t** subpages;
+ int subpages_num;
+
+ split_t* split;
} extract_page_t;
-/* A page. Contains different representations of the list of spans. NB not
-+called page_t because this clashes with a system type on hpux. */
+/* A page. Contains a list of subpages. NB not
+called page_t because this clashes with a system type on hpux. */
typedef struct
@@ -248,7 +291,7 @@ typedef struct
} images_t;
-int extract_document_join(extract_alloc_t* alloc, document_t* document);
+int extract_document_join(extract_alloc_t* alloc, document_t* document, int layout_analysis);
/* This does all the work of finding paragraphs and tables. */
double extract_matrices_to_font_size(matrix_t* ctm, matrix_t* trm);
@@ -273,5 +316,21 @@ typedef struct
content, e.g. so we know whether a font has changed so need to start a new odt
span. */
+int extract_page_analyse(extract_alloc_t* alloc, extract_page_t* page);
+/* Analyse page content for layouts. */
+
+int extract_subpage_alloc(extract_alloc_t* extract, rect_t mediabox, extract_page_t* page, subpage_t** psubpage);
+/* content_t constructor. */
+
+void extract_subpage_free(extract_alloc_t* alloc, subpage_t** psubpage);
+/* subpage_t destructor. */
+
+int subpage_span_append(extract_alloc_t* alloc, subpage_t* subpage, span_t* span);
+/* Push span onto the end of subpage. */
+
+int extract_split_alloc(extract_alloc_t* alloc, split_type_t type, int count, split_t** psplit);
+/* Allocate a split_t. */
+
+void extract_split_free(extract_alloc_t* alloc, split_t** psplit);
#endif