-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpdftext.h
122 lines (100 loc) · 3.27 KB
/
pdftext.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
* pdftext.h
*
* convert pdf to text or rich text (roff, html, tex)
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <poppler.h>
#include "pdfrects.h"
/*
* parameters for the input (D=delta, %=percentage of page)
*/
struct measure {
int newline; /* more Dy than this is a newline */
int newpar; /* more Dy than this is a new paragraph */
int rightreturn; /* line end before this x% is a new paragraph */
int newcolumnx; /* more than this Dx% is new column (and) */
int newcolumny; /* more than this -Dy% is new column (and) */
int indent; /* more than this at start of line is indent */
int headfooter; /* ignore x,y of chars at begin/end of page */
int blockdistance; /* distance between blocks of text */
char hyphen; /* this character is an hyphen */
};
/*
* output strings
*/
struct format {
char *parstart; /* paragraph start */
char *parend; /* paragraph end */
char *fontname; /* format for printing font names, or NULL */
char *plain; /* set font face */
char *italic;
char *bold;
char *bolditalic;
char *italicbegin; /* begin/end font face */
char *italicend;
char *boldbegin;
char *boldend;
gboolean reset; /* reset and restart face at par breaks */
char *backslash;
char *firstdot; /* substitute this for dot at start of line */
char *less;
char *greater;
char *and;
};
/*
* known output formats
*/
extern struct format format_roff;
extern struct format format_html;
extern struct format format_tex;
extern struct format format_textfont;
extern struct format format_text;
/* print reason for a paragraph break */
extern gboolean debugpar;
/* previous character, keep START at the end */
#define NONE '\0'
#define START '\1'
/* data for processing the characters */
struct scandata;
/* start processing a document */
void startdocument(FILE *fd,
int method, struct measure *measure, struct format *format,
struct scandata *scandata);
/* start processing a page (no end needed) */
void startpage(struct scandata *scanpage);
/* show the characters in a box in a page */
void showregion(FILE *fd, PopplerRectangle *zone, RectangleList *textarea,
char *text, GList *attrlist,
PopplerRectangle *rects, guint nrects,
struct measure *measure, struct format *format,
struct scandata *scandata, gboolean detectcolumn);
/* show the characters in a page */
void showpage(FILE *fd, PopplerPage *page,
PopplerRectangle *zone,
int method, int order,
struct measure *measure, struct format *format,
struct scandata *scandata);
/* end processing a document */
void enddocument(FILE *fd,
int method, struct measure *measure, struct format *format,
struct scandata *scandata);
/* show some pages of a pdf document */
void showdocumentpart(FILE *fd, PopplerDocument *doc, int first, int last,
PopplerRectangle *zone,
int method, int order,
struct measure *measure, struct format *format);
/* show a pdf document */
void showdocument(FILE *fd, PopplerDocument *doc,
PopplerRectangle *zone,
int method, int order,
struct measure *measure, struct format *format);
/* show a pdf file */
void showfile(FILE *fd, char *filename, int first, int last,
PopplerRectangle *zone,
int method, int order,
struct measure *measure, struct format *format);
/* parse a string into a struct format */
struct format *parseformat(char *s);