1
26
27
34 #include "stdafx.h"
36
37 #include <stdio.h>
38 #include <windows.h>
39
40
44 #include "zlib.h"
45
46 size_t FindStringInBuffer (char* buffer, char* search, size_t buffersize)
48 {
49 char* buffer0 = buffer;
50
51 size_t len = strlen(search);
52 bool fnd = false;
53 while (!fnd)
54 {
55 fnd = true;
56 for (size_t i=0; i<len; i++)
57 {
58 if (buffer[i]!=search[i])
59 {
60 fnd = false;
61 break;
62 }
63 }
64 if (fnd) return buffer - buffer0;
65 buffer = buffer + 1;
66 if (buffer - buffer0 + len >= buffersize) return -1;
67 }
68 return -1;
69 }
70
71 #define oldchar 15
73
74 float ExtractNumber(const char* search, int lastcharoffset)
77 {
78 int i = lastcharoffset;
79 while (i>0 && search[i]==' ') i--;
80 while (i>0 && (isdigit(search[i]) || search[i]=='.')) i--;
81 float flt=-1.0;
82 char buffer[oldchar+5]; ZeroMemory(buffer,sizeof(buffer));
83 strncpy(buffer, search+i+1, lastcharoffset-i);
84 if (buffer[0] && sscanf(buffer, "%f", &flt))
85 {
86 return flt;
87 }
88 return -1.0;
89 }
90
91 bool seen2(const char* search, char* recent)
93 {
94 if ( recent[oldchar-3]==search[0]
95 && recent[oldchar-2]==search[1]
96 && (recent[oldchar-1]==' ' || recent[oldchar-1]==0x0d || recent[oldchar-1]==0x0a)
97 && (recent[oldchar-4]==' ' || recent[oldchar-4]==0x0d || recent[oldchar-4]==0x0a)
98 )
99 {
100 return true;
101 }
102 return false;
103 }
104
105 void ProcessOutput(FILE* file, char* output, size_t len)
107 {
108 bool intextobject = false;
110
111 bool nextliteral = false;
113
114 int rbdepth = 0;
116
117 char oc[oldchar];
119 int j=0;
120 for (j=0; j<oldchar; j++) oc[j]=' ';
121
122 for (size_t i=0; i<len; i++)
123 {
124 char c = output[i];
125 if (intextobject)
126 {
127 if (rbdepth==0 && seen2("TD", oc))
128 {
129 float num = ExtractNumber(oc,oldchar-5);
132 if (num>1.0)
133 {
134 fputc(0x0d, file);
135 fputc(0x0a, file);
136 }
137 if (num<1.0)
138 {
139 fputc('\t', file);
140 }
141 }
142 if (rbdepth==0 && seen2("ET", oc))
143 {
144 intextobject = false;
146 fputc(0x0d, file);
147 fputc(0x0a, file);
148 }
149 else if (c=='(' && rbdepth==0 && !nextliteral)
150 {
151 rbdepth=1;
153 int num = ExtractNumber(oc,oldchar-1);
156 if (num>0)
157 {
158 if (num>1000.0)
159 {
160 fputc('\t', file);
161 }
162 else if (num>100.0)
163 {
164 fputc(' ', file);
165 }
166 }
167 }
168 else if (c==')' && rbdepth==1 && !nextliteral)
169 {
170 rbdepth=0;
172 }
173 else if (rbdepth==1)
174 {
175 if (c=='\\' && !nextliteral)
177 {
178 nextliteral = true;
180 }
181 else
182 {
183 nextliteral = false;
184 if ( ((c>=' ') && (c<='~')) || ((c>=128) && (c<255)) )
185 {
186 fputc(c, file);
187 }
188 }
189 }
190 }
191 for (j=0; j<oldchar-1; j++) oc[j]=oc[j+1];
193 oc[oldchar-1]=c;
194 if (!intextobject)
195 {
196 if (seen2("BT", oc))
197 {
198 intextobject = true;
200 }
201 }
202 }
203 }
204
205 int _tmain(int argc, _TCHAR* argv[])
206 {
207 FILE* fileo = fopen("c:\\pdf\\output2.txt", "w");
209 if (fileo) fclose(fileo);
210 fileo = fopen("c:\\pdf\\output2.txt", "a");
211
212 FILE* filei = fopen("c:\\pdf\\somepdf.pdf", "rb");
214
215 if (filei && fileo)
216 {
217 int fseekres = fseek(filei,0, SEEK_END); long filelen = ftell(filei);
220 fseekres = fseek(filei,0, SEEK_SET);
221
222 char* buffer = new char [filelen]; ZeroMemory(buffer, filelen);
224 size_t actualread = fread(buffer, filelen, 1 ,filei);
226 bool morestreams = true;
227
228 while (morestreams)
230 {
231 size_t streamstart = FindStringInBuffer (buffer, "stream", filelen);
234 size_t streamend = FindStringInBuffer (buffer, "endstream", filelen);
235 if (streamstart>0 && streamend>streamstart)
236 {
237 streamstart += 6;
239
240 if (buffer[streamstart]==0x0d && buffer[streamstart+1]==0x0a) streamstart+=2;
241 else if (buffer[streamstart]==0x0a) streamstart++;
242
243 if (buffer[streamend-2]==0x0d && buffer[streamend-1]==0x0a) streamend-=2;
244 else if (buffer[streamend-1]==0x0a) streamend--;
245
246 size_t outsize = (streamend - streamstart)*10;
248 char* output = new char [outsize]; ZeroMemory(output, outsize);
249
250 z_stream zstrm; ZeroMemory(&zstrm, sizeof(zstrm));
252
253 zstrm.avail_in = streamend - streamstart + 1;
254 zstrm.avail_out = outsize;
255 zstrm.next_in = (Bytef*)(buffer + streamstart);
256 zstrm.next_out = (Bytef*)output;
257
258 int rsti = inflateInit(&zstrm);
259 if (rsti == Z_OK)
260 {
261 int rst2 = inflate (&zstrm, Z_FINISH);
262 if (rst2 >= 0)
263 {
264 size_t totout = zstrm.total_out;
266 ProcessOutput(fileo, output, totout);
267 }
268 }
269 delete[] output; output=0;
270 buffer+= streamend + 7;
271 filelen = filelen - (streamend+7);
272 }
273 else
274 {
275 morestreams = false;
276 }
277 }
278 fclose(filei);
279 }
280 if (fileo) fclose(fileo);
281 return 0;
282 }