438339bf2c40f95765c32502d5e2025687ba083c
[sdk] / extras / html / htmlParser.ec
1 import "HTMLView"
2
3 #define MAX_TAG_LEN  204800
4 #define MAX_SYMBOL_LEN  1000
5
6 #define WORD_NONE    0
7 #define WORD_NORMAL  1
8 #define WORD_QUOTED  2
9
10 #define FONT_BOLD       0x00000001
11 #define FONT_ITALIC     0x00000002
12 #define FONT_UNDERLINE  0x00000004
13
14 class Block : struct
15 {
16    Block prev, next;
17    BlockType type;
18    char * text;
19    char * face;
20    float size;
21    FontEntry /*void * */font;
22    int textLen;
23    OldList subBlocks;
24    Block parent;
25    char * src;
26    char * href;
27    char * value;
28    char * action;
29    char * name;
30    char * anchor;
31
32    // Picture
33    ImageEntry imageEntry;
34    OldLink entryPtr;
35    Bitmap bitmap;
36
37    ColorAlpha bgColor;
38    ColorAlpha textColor;
39    Block prevFont;
40    int w, h;
41    uint attribs;
42    InputType inputType;
43    Window window;
44
45    // Tables
46    // Cells:
47    int span, rowSpan;
48    int width, height;
49    int pWidth, pHeight;
50    int minW;      // Minimum column width
51    int lineW;     // Minimum column width for lines to fit on one row
52    HorizontalAlignment halign;
53    VerticalAlignment valign;
54    OldList columns;
55    bool noWrap;
56    int cellPadding;
57
58    int startY, startX;
59
60    ~Block()
61    {
62       Block child;
63
64       delete src;
65       delete href;
66       delete text;
67       delete face;
68       delete anchor;
69       delete value;
70       delete action;
71       delete name;
72       columns.Free(null);
73
74       if(window)
75       {
76          window.Destroy(0);
77          delete window;
78       }
79
80       if(entryPtr)
81       {
82          imageEntry.bitmapPtrs.Remove(entryPtr);
83          delete entryPtr;
84       }
85
86       while(child = subBlocks.first)
87       {
88          subBlocks.Remove(child);
89          delete child;
90       }
91    }
92
93    void ClearEntries()
94    {
95       Block child;
96       entryPtr = null;
97       for(child = subBlocks.first; child; child = child.next)
98       {
99          child.ClearEntries();
100       }
101    }
102 };
103
104 static bool GetKeyWordEx(char ** input, char * keyWord, int maxSize, bool treatEqual)
105 {
106    char * string = *input;
107    char ch;
108    int c = 0;
109    bool quoted = false, start = true, wasQuoted = false;
110
111    for(; (ch = *string); string++)
112    {
113       if(!quoted && wasQuoted)
114          break;
115
116       if((ch == ' ' || ch == '\t') && !quoted) 
117       {
118          if(!start) break;
119       }
120       else
121       {
122          if(!quoted && ((ch == ',' || (treatEqual && ch == '=')) || ch == '>') )
123             break;
124          else if(ch == '\"' /*|| ch == '\''*/)
125          {
126             quoted ^= true;
127             wasQuoted = true;
128             start = false;
129          }
130          else if(ch != '\r' && ch != '\n')
131          {
132             if(c < maxSize) 
133                keyWord[c++] = ch;
134             start = false;
135          }
136       }
137    }
138    for(;*string == '>' || *string == ',' || *string == ' ' || *string == '=' || *string == '\t' || *string == '\r' || *string == '\n'; string++);
139    keyWord[c] = '\0';
140    *input = string;
141    return (c > 0) ? (wasQuoted ? WORD_QUOTED : WORD_NORMAL) : WORD_NONE;
142 }
143
144 static bool GetKeyWord(char ** input, char * keyWord, int maxSize)
145 {
146    return GetKeyWordEx(input, keyWord, maxSize, true);
147 }
148
149 static char * GetString(char * string, char * what, int count)
150 {
151    int wc, sc;
152
153    for(sc = 0; (!count && string[sc]) || sc<count; sc++)
154    {
155       if(string[sc] != ' ' && string[sc] != '\"')
156          break;
157    }
158    
159    for(wc = 0; what[wc]; wc++, sc++)
160    {
161       if((count && sc >= count) || (string[sc] != what[wc] && tolower(string[sc]) != tolower(what[wc])))
162          return null;
163    }
164    return string + sc;
165 }
166
167 static Block AddBlock(Block parent, BlockType type)
168 {
169    Block block = Block { parent = parent, type = type };
170    parent.subBlocks.Add(block);
171    return block;
172 }
173
174 #include <stdio.h>
175
176 class HTMLFile
177 {
178    Block block {};
179    Block defaultFont { };
180
181    Block body;
182    ColorAlpha background { 255, white };
183    //Button defaultButton;
184
185    bool Parse(File f)
186    {
187       bool insideTag = false;
188       char tag[MAX_TAG_LEN];
189       char symbol[MAX_SYMBOL_LEN];
190       int tagLen;
191       Block block = this.block, subBlock;
192       char * text;
193       int textLen = 0;
194       bool insideSymbol = false;
195       int symbolLen = 0;
196       int insideScript = 0, insideStyle = 0;
197       bool commented = false;
198       byte lastCh = ' ';
199       bool code = false;
200       bool quoted = false;
201
202       Block fontBlock = defaultFont;
203       fontBlock.type = FONT;
204       fontBlock.face = CopyString("Times New Roman");
205
206       /*
207       fontBlock.attribs = FONT_UNDERLINE|FONT_BOLD;
208       fontBlock.size = 12;
209       fontBlock.textColor = LT_BLUE|0xFF000000;
210       */
211       
212       fontBlock.textColor = black;
213       fontBlock.size = 10;
214
215       fontBlock.font = FontEntry { size = fontBlock.size, attribs = fontBlock.attribs, face = CopyString(fontBlock.face) };
216       fontCache.Add(fontBlock.font);
217
218       background = white;
219       
220       text = new char[32768*4];
221
222       body = block;
223       
224       // Parse entire file
225       while(!f.Eof() && block)
226       {
227          byte ch = 0;
228          
229          f.Getc(&ch);
230          //fwrite(&ch, 1, 1, stdout);
231          if(commented)
232          {
233             if((ch == '-' && tagLen < 2) || (ch == '>' && tagLen == 2))
234             {
235                tag[tagLen++] = ch;
236                tag[tagLen] = '\0';
237                if(!strcmp(tag,  "-->"))
238                {
239                   commented = false;
240                }
241             }
242             else
243                tagLen = 0;
244          }
245          else if(insideTag)
246          {
247             if(ch == '\"')
248                quoted ^= true;
249             if(ch == '<' && !quoted)
250                insideTag++;
251             /*else */if(ch == '>' && !quoted)
252             {
253                insideTag--;
254                if(!insideTag)
255                {
256                   char keyWord[1024];
257                   char * string;
258
259                   tag[tagLen] = '\0';
260                   insideTag = false;
261
262                   string = tag;
263
264                   if(GetKeyWord(&string, keyWord, sizeof(keyWord)))
265                   {
266                      if(!strcmpi(keyWord, "object"))
267                      {
268                         subBlock = AddBlock(block, IMAGE);
269                         for(;string[0];)
270                         {
271                            GetKeyWord(&string, keyWord, sizeof(keyWord));
272                            if(!strcmpi(keyWord, "width"))
273                            {
274                               GetKeyWord(&string, keyWord, sizeof(keyWord));
275                               subBlock.w = atoi(keyWord);
276                            }
277                            else if(!strcmpi(keyWord, "height"))
278                            {
279                               GetKeyWord(&string, keyWord, sizeof(keyWord));
280                               subBlock.h = atoi(keyWord);
281                            }
282                         }
283                      }
284                      else if(!strcmpi(keyWord, "img"))
285                      {
286                         subBlock = AddBlock(block, IMAGE);
287                         subBlock.valign = bottom;
288                         subBlock.halign = middle;
289                         for(;string[0];)
290                         {
291                            GetKeyWord(&string, keyWord, sizeof(keyWord));
292                            if(!strcmpi(keyWord, "src"))
293                            {
294                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
295                               delete subBlock.src;
296                               subBlock.src = keyWord[0] ? CopyString(keyWord) : null;
297                            }
298                            else if(!strcmpi(keyWord, "width"))
299                            {
300                               GetKeyWord(&string, keyWord, sizeof(keyWord));
301                               if(strstr(keyWord, "%"))
302                                  subBlock.pWidth = atoi(keyWord);
303                               else
304                                  subBlock.width = atoi(keyWord);
305                            }
306                            else if(!strcmpi(keyWord, "height"))
307                            {
308                               GetKeyWord(&string, keyWord, sizeof(keyWord));
309                               if(strstr(keyWord, "%"))
310                                  subBlock.pHeight = atoi(keyWord);
311                               else
312                                  subBlock.height = atoi(keyWord);
313                            }
314                            else if(!strcmpi(keyWord, "valign"))
315                            {
316                               GetKeyWord(&string, keyWord, sizeof(keyWord));
317                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
318                               {
319                                  subBlock.valign = middle;
320                               }
321                            }
322                            else if(!strcmpi(keyWord, "align"))
323                            {
324                               GetKeyWord(&string, keyWord, sizeof(keyWord));
325                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
326                               {
327                                  subBlock.halign = middle;
328                               }
329                               else if(!strcmpi(keyWord, "left"))
330                               {
331                                  subBlock.halign = left;
332                               }
333                               else if(!strcmpi(keyWord, "right"))
334                               {
335                                  subBlock.halign = right;
336                               }
337                               else if(!strcmpi(keyWord, "top"))
338                               {
339                                  subBlock.valign = top;
340                               }
341                               else if(!strcmpi(keyWord, "bottom"))
342                               {
343                                  subBlock.valign = bottom;
344                               }
345                            }
346                         }
347                      }
348                      else if(!strcmpi(keyWord, "body"))
349                      {
350                         block = AddBlock(block, BODY);
351                         body = block;
352                         lastCh = ' ';
353
354                         for(;string[0];)
355                         {
356                            GetKeyWord(&string, keyWord, sizeof(keyWord));
357                            if(!strcmpi(keyWord, "bgcolor"))
358                            {
359                               GetKeyWord(&string, keyWord, sizeof(keyWord));
360                               background = strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
361                               if(keyWord[0] != '#' || strlen(keyWord) <= 7)
362                                  background |= 0xFF000000;
363                            }
364                            else if(!strcmpi(keyWord, "text"))
365                            {
366                               GetKeyWord(&string, keyWord, sizeof(keyWord));
367                               fontBlock.textColor = 
368                                  block.textColor = 0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
369                            }
370                         }
371                      }
372                      else if(!strcmpi(keyWord, "br"))
373                      {
374                         subBlock = AddBlock(block, BR);
375                         lastCh = ' ';
376                      }
377                      else if(!strcmpi(keyWord, "code"))
378                      {
379                         code = true;
380                      }
381                      else if(!strcmpi(keyWord, "/code"))
382                      {
383                         code = false;
384                      }
385                      else if(!strcmpi(keyWord, "p"))
386                      {
387                      }
388                      else if(!strcmpi(keyWord, "font") || !strcmpi(keyWord, "b") || !strcmpi(keyWord, "i") 
389                         || !strcmpi(keyWord, "strong") || !strcmpi(keyWord, "em") || 
390                         !strcmpi(keyWord, "h1") || !strcmpi(keyWord, "h2") || !strcmpi(keyWord, "h3"))
391                      {
392                         subBlock = AddBlock(block, FONT);
393                         subBlock.attribs = fontBlock.attribs;
394                         if(!strcmpi(keyWord, "font"))
395                         {
396                            for(;string[0];)
397                            {
398                               GetKeyWord(&string, keyWord, sizeof(keyWord));
399
400                               if(!strcmpi(keyWord, "face"))
401                               {
402                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
403                                  delete subBlock.face;
404                                  subBlock.face = CopyString(keyWord);
405                               }
406                               else if(!strcmpi(keyWord, "size"))
407                               {
408                                  #define NUM_FONT_SIZES  7
409                                  static float fontSizes[NUM_FONT_SIZES] = { 7.5f, 10, 12, 13.5f, 18, 24, 36 };
410                                  int size;
411                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
412                                  size = atoi(keyWord);
413                                  if(keyWord[0] == '+')
414                                  {
415                                     int c;
416                                     int numTimes = atoi(keyWord+1);
417                                     for(c = 0; c<NUM_FONT_SIZES-1; c++)
418                                        if(fontSizes[c] > fontBlock.size)
419                                        {
420                                           numTimes--;
421                                           if(!numTimes) break;
422                                        }
423                                     subBlock.size = fontSizes[c];
424                                  }
425                                  else if(keyWord[0] == '-')
426                                  {
427                                     int c;
428                                     int numTimes = atoi(keyWord+1);
429                                     for(c = NUM_FONT_SIZES-1; c>0; c--)
430                                        if(fontSizes[c] < fontBlock.size)
431                                        {
432                                           numTimes--;
433                                           if(!numTimes) break;
434                                        }
435                                     subBlock.size = fontSizes[c];
436                                  }
437                                  else if(size && size < NUM_FONT_SIZES)
438                                  {
439                                     subBlock.size = fontSizes[size-1];
440                                  }
441
442                               }
443                               else if(!strcmpi(keyWord, "color"))
444                               {
445                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
446                                  subBlock.textColor = 
447                                     0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
448                               }
449                            }
450                         }
451                         else
452                         {
453                            if(!strcmpi(keyWord, "b") || !strcmpi(keyWord, "strong"))
454                               subBlock.attribs |= FONT_BOLD;
455                            else if(!strcmpi(keyWord, "i") || !strcmpi(keyWord, "em"))
456                               subBlock.attribs |= FONT_ITALIC;
457                            else if(!strcmpi(keyWord, "h1"))
458                               subBlock.size = 20;
459                            else if(!strcmpi(keyWord, "h2"))
460                               subBlock.size = 16;
461                            else if(!strcmpi(keyWord, "h3"))
462                               subBlock.size = 14;
463                         }
464
465                         if(!subBlock.face) subBlock.face = CopyString(fontBlock.face);
466                         if(!subBlock.size) subBlock.size = fontBlock.size;
467                         if(!subBlock.textColor) subBlock.textColor = fontBlock.textColor;
468
469                         subBlock.prevFont = fontBlock;
470                         fontBlock = subBlock;
471                         block = subBlock;
472                      }
473                      else if(!strcmpi(keyWord, "/font") ||
474                              !strcmpi(keyWord, "/b") ||
475                              !strcmpi(keyWord, "/strong") ||
476                              !strcmpi(keyWord, "/em") ||
477                              !strcmpi(keyWord, "/i") ||
478                              !strcmpi(keyWord, "/h1") ||
479                              !strcmpi(keyWord, "/h2") ||
480                              !strcmpi(keyWord, "/h3"))
481                      {
482                         if(block.type == FONT)
483                         {
484                            fontBlock = block.prevFont;
485                            block = block.parent;
486                         }
487                      }
488                      else if(!strcmpi(keyWord, "a"))
489                      {
490                         int textDecoration = 0;
491                         subBlock = AddBlock(block, ANCHOR);
492                         subBlock.attribs = fontBlock.attribs;
493
494                         for(;string[0];)
495                         {
496                            GetKeyWord(&string, keyWord, sizeof(keyWord));
497
498                            if(!strcmpi(keyWord, "name"))
499                            {
500                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
501                               delete subBlock.anchor;
502                               subBlock.anchor = CopyString(keyWord);
503                            }
504                            else if(!strcmpi(keyWord, "href"))
505                            {
506                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
507                               delete subBlock.href;
508                               subBlock.href = CopyString(keyWord);
509                               if(!textDecoration)
510                                  textDecoration = 1;
511                            }
512                            else if(!strcmpi(keyWord, "style"))
513                            {
514                               //for(;string[0];)
515                               {
516                                  GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
517                                  if(strstr(keyWord, "text-decoration:") && strstr(keyWord, "none;"))
518                                     textDecoration = 2;
519                               }
520                            }
521                         }
522                         subBlock.attribs |= FONT_BOLD;
523                         if(textDecoration == 1) subBlock.attribs |= FONT_UNDERLINE;
524                         delete subBlock.face;
525                         subBlock.face = CopyString(fontBlock.face);
526                         subBlock.size = fontBlock.size;
527                         subBlock.textColor = Color { 85,85,255 };
528                         subBlock.prevFont = fontBlock;
529                         fontBlock = subBlock;
530                         block = subBlock;
531                      }
532                      else if(!strcmpi(keyWord, "/a"))
533                      {
534                         if(block.type == ANCHOR)
535                         {
536                            fontBlock = block.prevFont;
537                            block = block.parent;
538                         }
539                      }
540                      else if(!strcmpi(keyWord, "script"))
541                      {
542                         insideScript++;
543                      }
544                      else if(!strcmpi(keyWord, "/script"))
545                      {
546                         if(insideScript)
547                            insideScript--;
548                      }
549                      else if(!strcmpi(keyWord, "style"))
550                      {
551                         insideStyle++;
552                      }
553                      else if(!strcmpi(keyWord, "/style"))
554                      {
555                         if(insideStyle)
556                            insideStyle--;
557                      }
558                      else if(!strcmpi(keyWord, "input"))
559                      {
560                         subBlock = AddBlock(block, INPUT);
561                         for(;string[0];)
562                         {
563                            GetKeyWord(&string, keyWord, sizeof(keyWord));
564
565                            if(!strcmpi(keyWord, "type"))
566                            {
567                               GetKeyWord(&string, keyWord, sizeof(keyWord));
568                               if(!strcmpi(keyWord, "text"))
569                               {
570                                  subBlock.inputType = InputType::text;
571                               }
572                               else if(!strcmpi(keyWord, "submit"))
573                               {
574                                  subBlock.inputType = submit;
575                               }
576                               else if(!strcmpi(keyWord, "radio"))
577                               {
578                                  subBlock.inputType = radio;
579                               }
580                               else if(!strcmpi(keyWord, "hidden"))
581                               {
582                                  subBlock.inputType = hidden;
583                               }
584                               delete subBlock.href;
585                               subBlock.href = CopyString(keyWord);
586                            }
587                            else if(!strcmpi(keyWord, "size"))
588                            {
589                               int size;
590                               GetKeyWord(&string, keyWord, sizeof(keyWord));
591                               size = atoi(keyWord);
592                            }
593                            else if(!strcmpi(keyWord, "value"))
594                            {
595                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
596                               delete subBlock.value;
597                               subBlock.value = CopyString(keyWord);
598                            }
599                            else if(!strcmpi(keyWord, "name"))
600                            {
601                               GetKeyWord(&string, keyWord, sizeof(keyWord));
602                               delete subBlock.name;
603                               subBlock.name = CopyString(keyWord);
604                            }
605                         }
606                      }
607                      else if(!strcmpi(keyWord, "form"))
608                      {
609                         subBlock = AddBlock(block, FORM);
610                         for(;string[0];)
611                         {
612                            GetKeyWord(&string, keyWord, sizeof(keyWord));
613
614                            if(!strcmpi(keyWord, "action"))
615                            {
616                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false);
617                               delete subBlock.action;
618                               subBlock.action = CopyString(keyWord);
619                            }
620                         }
621                         block = subBlock;
622                      }
623                      else if(!strcmpi(keyWord, "/form"))
624                      {
625                         if(block.type == FORM)
626                         {
627                            block = block.parent;
628                         }
629                      }
630                      else if(!strcmpi(keyWord, "center"))
631                      {
632                         subBlock = AddBlock(block, CENTER);
633                         block = subBlock;
634                      }
635                      else if(!strcmpi(keyWord, "/center"))
636                      {
637                         if(block.type == CENTER)
638                         {
639                            block = block.parent;
640                         }
641                      }
642                      else if(!strcmpi(keyWord, "table"))
643                      {
644                         lastCh = ' ';
645                         subBlock = AddBlock(block, TABLE);
646                         subBlock.cellPadding = 4;
647                         for(;string[0];)
648                         {
649                            GetKeyWord(&string, keyWord, sizeof(keyWord));
650
651                            if(!strcmpi(keyWord, "width"))
652                            {
653                               GetKeyWord(&string, keyWord, sizeof(keyWord));
654                               if(strstr(keyWord, "%"))
655                                  subBlock.pWidth = atoi(keyWord);
656                               else
657                                  subBlock.width = atoi(keyWord);
658                            }
659                            else if(!strcmpi(keyWord, "height"))
660                            {
661                               GetKeyWord(&string, keyWord, sizeof(keyWord));
662                               if(strstr(keyWord, "%"))
663                                  subBlock.pHeight = atoi(keyWord);
664                               else
665                                  subBlock.height = atoi(keyWord);
666                            }
667                            else if(!strcmpi(keyWord, "cellpadding"))
668                            {
669                               GetKeyWord(&string, keyWord, sizeof(keyWord));
670                               subBlock.cellPadding = atoi(keyWord);
671                            }
672                            else if(!strcmpi(keyWord, "background"))
673                            {
674                               GetKeyWord(&string, keyWord, sizeof(keyWord));
675                               delete subBlock.src;
676                               subBlock.src = CopyString(keyWord);
677                            }
678                            else if(!strcmpi(keyWord, "bgcolor"))
679                            {
680                               GetKeyWord(&string, keyWord, sizeof(keyWord));
681                               subBlock.bgColor = 0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
682                            }
683                            else if(!strcmpi(keyWord, "align"))
684                            {
685                               GetKeyWord(&string, keyWord, sizeof(keyWord));
686                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
687                               {
688                                  subBlock.halign = middle;
689                               }
690                               else if(!strcmpi(keyWord, "left"))
691                               {
692                                  subBlock.halign = left;
693                               }
694                               else if(!strcmpi(keyWord, "right"))
695                               {
696                                  subBlock.halign = right;
697                               }
698                            }
699                         }
700
701                         block = subBlock;
702                      }
703                      else if(!strcmpi(keyWord, "/table"))
704                      {
705                         if(block.type == TD)
706                         {
707                            block = block.parent;
708                            lastCh = ' ';
709                         }
710                         if(block.type == TR)
711                         {
712                            block = block.parent;
713                            lastCh = ' ';
714                         }
715                         if(block.type == TABLE)
716                         {
717                            block = block.parent;
718                            lastCh = ' ';
719                         }
720                      }
721                      else if(!strcmpi(keyWord, "tr"))
722                      {
723                         if(block.type == TD)
724                         {
725                            block = block.parent;
726                            lastCh = ' ';
727                         }
728                         if(block.type == TR)
729                         {
730                            block = block.parent;
731                            lastCh = ' ';
732                         }
733
734                         subBlock = AddBlock(block, TR);
735
736                         for(;string[0];)
737                         {
738                            GetKeyWord(&string, keyWord, sizeof(keyWord));
739                            /*
740                            if(!strcmpi(keyWord, "height"))
741                            {
742                               GetKeyWord(&string, keyWord, sizeof(keyWord));
743                               subBlock.height = atoi(keyWord);
744                            }
745                            */
746                            if(!strcmpi(keyWord, "align"))
747                            {
748                               GetKeyWord(&string, keyWord, sizeof(keyWord));
749                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
750                               {
751                                  subBlock.valign = middle;
752                               }
753                               else if(!strcmpi(keyWord, "top"))
754                               {
755                                  subBlock.valign = top;
756                               }
757                               else if(!strcmpi(keyWord, "bottom"))
758                               {
759                                  subBlock.valign = bottom;
760                               }
761                            }
762                         }
763                         block = subBlock;
764                      }
765                      else if(!strcmpi(keyWord, "/tr"))
766                      {
767                         if(block.type == TD)
768                         {
769                            block = block.parent;
770                            lastCh = ' ';
771                         }
772                         if(block.type == TR)
773                         {
774                            block = block.parent;
775                            lastCh = ' ';
776                         }
777                      }
778                      else if(!strcmpi(keyWord, "td"))
779                      {
780                         while(block && block.type != TR && block.type != TABLE)
781                         {
782                            if(block == fontBlock)
783                               fontBlock = block.prevFont;
784                            block = block.parent;
785                            lastCh = ' ';
786                         }
787                         if(block)
788                         {
789                            if(block.type == TABLE)
790                            {
791                               block = AddBlock(block, TR);
792                            }
793                            subBlock = AddBlock(block, TD);
794                            subBlock.span = subBlock.rowSpan = 1;
795                            subBlock.valign = block.valign;
796                            subBlock.halign = block.halign;
797
798                            for(;string[0];)
799                            {
800                               GetKeyWord(&string, keyWord, sizeof(keyWord));
801
802                               if(!strcmpi(keyWord, "width"))
803                               {
804                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
805                                  if(strstr(keyWord, "%"))
806                                     subBlock.pWidth = atoi(keyWord);
807                                  else
808                                     subBlock.width = atoi(keyWord);
809                               }
810                               else if(!strcmpi(keyWord, "height"))
811                               {
812                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
813                                  if(strstr(keyWord, "%"))
814                                     subBlock.pHeight = atoi(keyWord);
815                                  else
816                                     subBlock.height = atoi(keyWord);
817                               }
818                               else if(!strcmpi(keyWord, "colspan"))
819                               {
820                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
821                                  subBlock.span = atoi(keyWord);
822                               }
823                               else if(!strcmpi(keyWord, "cellpadding"))
824                               {
825                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
826                                  subBlock.cellPadding = atoi(keyWord);
827                               }
828                               else if(!strcmpi(keyWord, "rowspan"))
829                               {
830                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
831                                  subBlock.rowSpan = atoi(keyWord);
832                               }
833                               else if(!strcmpi(keyWord, "nowrap"))
834                               {
835                                  subBlock.noWrap = true;
836                               }
837                               else if(!strcmpi(keyWord, "background"))
838                               {
839                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
840                                  delete subBlock.src;
841                                  subBlock.src = CopyString(keyWord);
842                               }
843                               else if(!strcmpi(keyWord, "bgcolor"))
844                               {
845                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
846                                  subBlock.bgColor = 0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
847                               }
848                               else if(!strcmpi(keyWord, "valign"))
849                               {
850                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
851                                  if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
852                                  {
853                                     subBlock.valign = middle;
854                                  }
855                                  else if(!strcmpi(keyWord, "top"))
856                                  {
857                                     subBlock.valign = top;
858                                  }
859                                  else if(!strcmpi(keyWord, "bottom"))
860                                  {
861                                     subBlock.valign = bottom;
862                                  }
863                               }
864                               else if(!strcmpi(keyWord, "align"))
865                               {
866                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
867                                  if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
868                                  {
869                                     subBlock.halign = middle;
870                                  }
871                                  else if(!strcmpi(keyWord, "left"))
872                                  {
873                                     subBlock.halign = left;
874                                  }
875                                  else if(!strcmpi(keyWord, "right"))
876                                  {
877                                     subBlock.halign = right;
878                                  }
879                               }
880                            }
881                            block = subBlock;
882                         }
883                      }
884                      else if(!strcmpi(keyWord, "/td"))
885                      {
886                         if(block.type == TD)
887                         {
888                            block = block.parent;
889                            lastCh = ' ';
890                         }
891                      }
892                      else if(!strcmpi(keyWord, "/html"))
893                         break;
894                   }
895                }
896                else
897                {
898                   tag[tagLen++] = ch;
899                   tag[tagLen] = '\0';
900                }
901             }
902             else
903             {
904                tag[tagLen++] = ch;
905                tag[tagLen] = '\0';
906             }
907             if(!strcmp(tag, "!--"))
908             {
909                commented = true;
910                insideTag = false;
911                tagLen = 0;
912                tag[tagLen] = '\0';
913             }
914          }
915          else
916          {
917             if(ch == '<')
918             {
919                if(textLen)
920                {
921                   if(block.type == TABLE)
922                   {
923                      subBlock = AddBlock(block, TR);
924                      block = subBlock;
925                   }
926                   if(block.type == TR)
927                   {
928                      subBlock = AddBlock(block, TD);
929                      subBlock.span = subBlock.rowSpan = 1;
930                      subBlock.valign = block.valign;
931                      subBlock.halign = block.halign;
932                      block = subBlock;
933                   }
934
935                   subBlock = AddBlock(block, TEXT);
936                   delete subBlock.text;
937                   subBlock.text = CopyString(text);
938                   subBlock.textLen = textLen;
939
940
941                   textLen = 0;
942                   text[0] = '\0';
943
944                }
945
946                insideTag = true;
947                tagLen = 0;
948             }
949             else if(ch != '\n' && ch != '\r' && ch != '\t' && !insideScript && !insideStyle)
950             {
951                if(insideSymbol)
952                {
953                   if(ch == ';')
954                   {
955                      unichar unicode = 0;
956                      char utf8[5];
957                      if(!strcmpi(symbol, "nbsp")) 
958                         unicode = ' ';
959                      else if(!strcmpi(symbol, "copy")) 
960                         unicode ='©';
961                      else if(!strcmpi(symbol, "raquo")) 
962                         unicode = '»';
963                      else if(!strcmpi(symbol, "eacute"))
964                         unicode = 'é';
965                      else if(!strcmpi(symbol, "egrave"))
966                         unicode = 'è';
967                      else if(!strcmpi(symbol, "ecirc"))
968                         unicode = 'ê';
969                      else if(!strcmpi(symbol, "agrave"))
970                         unicode = 'à';
971                      else if(!strcmpi(symbol, "acirc"))
972                         unicode = 'â';
973                      else if(!strcmpi(symbol, "ocirc"))
974                         unicode = 'ô';                     
975                      if(unicode)
976                      {
977                         int len = UTF32toUTF8Len(&unicode, 1, utf8, 5);
978                         int c;
979                         for(c = 0; c<len; c++)
980                            text[textLen++] = utf8[c];
981                         lastCh = 0;
982                      }
983                      text[textLen] = '\0';
984                      insideSymbol = false;
985                   }
986                   else if(ch == ' ')
987                   {
988                      // Give up... Treat it as text
989                      text[textLen++] = '&';
990                      CopyBytes(text + textLen, symbol, symbolLen);
991                      textLen += symbolLen;
992                      lastCh = text[textLen++] = ' ';
993                      text[textLen] = '\0';
994                      insideSymbol = false;
995                   }
996                   else
997                   {
998                      symbol[symbolLen++] = ch;
999                      symbol[symbolLen] = '\0';
1000                   }
1001                }
1002                else
1003                {
1004                   if(ch == '&')
1005                   {
1006                      insideSymbol = true;
1007                      symbol[0] = '\0';
1008                      symbolLen = 0;
1009                   }
1010                   else
1011                   {
1012                      if(ch != ' ' || lastCh != ' ' || code)
1013                      // if(ch != ' ' || (textLen && text[textLen-1] !=' '))
1014                      {
1015                         lastCh = ch;
1016                         text[textLen++] = ch;
1017                         text[textLen] = '\0';
1018                      }
1019                   }
1020                }
1021             }
1022             else if(ch == '\n' && code)
1023             {
1024                //printf("%d\n", textLen);
1025                //puts(text);
1026
1027                subBlock = AddBlock(block, TEXT);
1028                delete subBlock.text;
1029                subBlock.text = CopyString(text);
1030                subBlock.textLen = textLen;
1031                textLen = 0;
1032                text[0] = '\0';
1033                subBlock = AddBlock(block, BR);               
1034             }
1035          }
1036       }
1037       delete text;
1038       /*while(!f.Eof())
1039       {
1040          byte ch = 0;
1041          f.Getc(&ch);
1042       }*/
1043       return true;
1044    }
1045 }