cleaned all trailing white space from source files.
[sdk] / extras / html / htmlParser.ec
1 import "HTMLView"
2
3 #define MAX_TAG_LEN     256
4 #define MAX_SYMBOL_LEN  256
5
6 #define WORD_NONE    0
7 #define WORD_NORMAL  1
8 #define WORD_QUOTED  2
9
10 #define FONT_BOLD       0x00000001
11 #define FONT_ITALIC     0x00000002
12 #define FONT_UNDERLINE  0x00000004
13
14 class Block : struct
15 {
16    Block prev, next;
17    BlockType type;
18    char * text;
19    char * face;
20    float size;
21    FontEntry /*void * */font;
22    int textLen;
23    OldList subBlocks;
24    Block parent;
25    char * src;
26    char * href;
27    char * value;
28    char * action;
29    char * name;
30    char * anchor;
31
32    // Picture
33    ImageEntry imageEntry;
34    OldLink entryPtr;
35    Bitmap bitmap;
36
37    ColorAlpha bgColor;
38    ColorAlpha textColor;
39    Block prevFont;
40    int w, h;
41    uint attribs;
42    InputType inputType;
43    Window window;
44
45    // Tables
46    // Cells:
47    int span, rowSpan;
48    int width, height;
49    int pWidth, pHeight;
50    int minW;      // Minimum column width
51    int lineW;     // Minimum column width for lines to fit on one row
52    HorizontalAlignment halign;
53    VerticalAlignment valign;
54    OldList columns;
55    bool noWrap;
56    int cellPadding;
57
58    int startY, startX;
59
60    ~Block()
61    {
62       Block child;
63
64       delete src;
65       delete href;
66       delete text;
67       delete face;
68       delete anchor;
69       delete value;
70       delete action;
71       delete name;
72       columns.Free(null);
73
74       if(window)
75       {
76          window.Destroy(0);
77          delete window;
78       }
79
80       if(entryPtr)
81       {
82          imageEntry.bitmapPtrs.Remove(entryPtr);
83          delete entryPtr;
84       }
85
86       while(child = subBlocks.first)
87       {
88          subBlocks.Remove(child);
89          delete child;
90       }
91    }
92
93    void ClearEntries()
94    {
95       Block child;
96       entryPtr = null;
97       for(child = subBlocks.first; child; child = child.next)
98       {
99          child.ClearEntries();
100       }
101    }
102 };
103
104 String ParseURL(String input)
105 {
106    int c;
107    char ch;
108    int len = strlen(input);
109    String output = new char[len+1];
110    len = 0;
111    for(c = 0; (ch = input[c]); c++)
112    {
113       if(ch == '%' && isalnum(input[c+1]) && isalnum(input[c+2]))
114       {
115          char hex[3] = { input[c+1], input[c+2], 0 };
116          char * end;
117          int v = (int)strtoul(hex, &end, 16);
118          if(v && end == hex + 2)
119          {
120             output[len++] = (char)v;
121             c += 2;
122             continue;
123          }
124       }
125       output[len++] = ch;
126    }
127    output[len++] = 0;
128    return renew output char[len];
129 }
130
131 /*static */bool GetKeyWordEx(char ** input, char * keyWord, int maxSize, bool treatEqual, bool acceptSingleQuote)
132 {
133    char * string = *input;
134    char ch;
135    int c = 0;
136    bool quoted = false, start = true, wasQuoted = false;
137    char quoteChar = 0;
138
139    for(; (ch = *string); string++)
140    {
141       if(!quoted && wasQuoted)
142          break;
143
144       if((ch == ' ' || ch == '\t') && !quoted)
145       {
146          if(!start) break;
147       }
148       else
149       {
150          if(!quoted && ((ch == ',' || (treatEqual && ch == '=')) || ch == '>') )
151             break;
152          else if((ch == '\"' || (acceptSingleQuote && ch == '\'')) && (!quoteChar || quoteChar == ch))
153          {
154             if(!wasQuoted)
155                quoteChar = ch;
156             quoted ^= true;
157             wasQuoted = true;
158             start = false;
159          }
160          else if(ch != '\r' && ch != '\n')
161          {
162             if(c < maxSize)
163                keyWord[c++] = ch;
164             start = false;
165          }
166       }
167    }
168    for(;*string == '>' || *string == ',' || *string == ' ' || *string == '=' || *string == '\t' || *string == '\r' || *string == '\n'; string++);
169    keyWord[c] = '\0';
170    *input = string;
171    return (c > 0) ? (wasQuoted ? WORD_QUOTED : WORD_NORMAL) : WORD_NONE;
172 }
173
174 static bool GetKeyWord(char ** input, char * keyWord, int maxSize)
175 {
176    return GetKeyWordEx(input, keyWord, maxSize, true, false);
177 }
178
179 static char * GetString(char * string, char * what, int count)
180 {
181    int wc, sc;
182
183    for(sc = 0; (!count && string[sc]) || sc<count; sc++)
184    {
185       if(string[sc] != ' ' && string[sc] != '\"')
186          break;
187    }
188
189    for(wc = 0; what[wc]; wc++, sc++)
190    {
191       if((count && sc >= count) || (string[sc] != what[wc] && tolower(string[sc]) != tolower(what[wc])))
192          return null;
193    }
194    return string + sc;
195 }
196
197 #include <stdio.h>
198
199 String EncodeString(String input, int * lenPtr)
200 {
201    if(UTF8Validate(input))
202    {
203       return CopyString(input);
204    }
205    else
206    {
207       int len = strlen(input);
208       String s = new char[len*4+1];
209       len = ISO8859_1toUTF8(input, s, len*4);
210       if(lenPtr) *lenPtr = len;
211       return renew s char[len+1];
212    }
213 }
214
215 class HTMLFile
216 {
217    Block block {};
218    Block defaultFont { };
219
220    Block body;
221    Block titleBlock;
222    ColorAlpha background { 255, white };
223    String baseHRef;
224    //Button defaultButton;
225
226    Block ::AddBlock(Block parent, BlockType type)
227    {
228       Block block = Block { parent = parent, type = type };
229       parent.subBlocks.Add(block);
230       return block;
231    }
232
233    ~HTMLFile()
234    {
235       delete baseHRef;
236    }
237
238    bool Parse(File f)
239    {
240       bool result = true;
241       bool insideTag = false;
242       char tag[MAX_TAG_LEN];
243       char symbol[MAX_SYMBOL_LEN];
244       int tagLen = 0;
245       Block block = this.block, subBlock;
246       char * text;
247       int textLen = 0;
248       bool insideSymbol = false;
249       int symbolLen = 0;
250       int insideScript = 0, insideStyle = 0;
251       bool commented = false;
252       byte lastCh = ' ';
253       bool code = false;
254       bool quoted = false;
255       bool lastBR = true;
256
257       Block fontBlock = defaultFont;
258       fontBlock.type = FONT;
259       fontBlock.face = CopyString("Times New Roman");
260
261       /*
262       fontBlock.attribs = FONT_UNDERLINE|FONT_BOLD;
263       fontBlock.size = 12;
264       fontBlock.textColor = LT_BLUE|0xFF000000;
265       */
266
267       fontBlock.textColor = black;
268       fontBlock.size = 10;
269
270       /*fontBlock.font = FontEntry { size = fontBlock.size, attribs = fontBlock.attribs, face = CopyString(fontBlock.face) };
271       fontCache.Add(fontBlock.font);*/
272
273       background = white;
274
275       text = new char[32768*4];
276
277       block.font = fontBlock.font;
278       body = block;
279
280       // Parse entire file
281       while(!f.Eof() && block)
282       {
283          byte ch = 0;
284
285          f.Getc(&ch);
286 #ifdef _DEBUG
287          //fwrite(&ch, 1, 1, stdout);
288 #endif
289          if(commented)
290          {
291             if((ch == '-' && tagLen < 2) || (ch == '>' && tagLen == 2))
292             {
293                tag[tagLen++] = ch;
294                tag[tagLen] = '\0';
295                if(!strcmp(tag,  "-->"))
296                {
297                   commented = false;
298                }
299             }
300             else
301                tagLen = 0;
302          }
303          else if(insideTag)
304          {
305             if(ch == '\"')
306                quoted ^= true;
307             if(ch == '<' && !quoted && !insideScript && !insideStyle)
308             {
309                insideTag++;
310             }
311             /*else */if(ch == '>' && !quoted)
312             {
313                insideTag--;
314                if(!insideTag)
315                {
316                   char keyWord[1024];
317                   char * string;
318
319                   tag[tagLen] = '\0';
320                   insideTag = false;
321
322                   string = tag;
323
324                   if(GetKeyWord(&string, keyWord, sizeof(keyWord)))
325                   {
326                      if(!strcmpi(keyWord, "object"))
327                      {
328                         subBlock = AddBlock(block, IMAGE);
329                         for(;string[0];)
330                         {
331                            GetKeyWord(&string, keyWord, sizeof(keyWord));
332                            if(!strcmpi(keyWord, "width"))
333                            {
334                               GetKeyWord(&string, keyWord, sizeof(keyWord));
335                               subBlock.w = atoi(keyWord);
336                            }
337                            else if(!strcmpi(keyWord, "height"))
338                            {
339                               GetKeyWord(&string, keyWord, sizeof(keyWord));
340                               subBlock.h = atoi(keyWord);
341                            }
342                         }
343                      }
344                      else if(!strcmpi(keyWord, "img"))
345                      {
346                         lastBR = false;
347                         subBlock = AddBlock(block, IMAGE);
348                         subBlock.valign = bottom;
349                         subBlock.halign = middle;
350                         for(;string[0];)
351                         {
352                            GetKeyWord(&string, keyWord, sizeof(keyWord));
353                            if(!strcmpi(keyWord, "src"))
354                            {
355                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, false);
356                               delete subBlock.src;
357                               subBlock.src = keyWord[0] ? CopyString(keyWord) : null;
358                            }
359                            else if(!strcmpi(keyWord, "width"))
360                            {
361                               GetKeyWord(&string, keyWord, sizeof(keyWord));
362                               if(strstr(keyWord, "%"))
363                                  subBlock.pWidth = atoi(keyWord);
364                               else
365                                  subBlock.width = atoi(keyWord);
366                            }
367                            else if(!strcmpi(keyWord, "height"))
368                            {
369                               GetKeyWord(&string, keyWord, sizeof(keyWord));
370                               if(strstr(keyWord, "%"))
371                                  subBlock.pHeight = atoi(keyWord);
372                               else
373                                  subBlock.height = atoi(keyWord);
374                            }
375                            else if(!strcmpi(keyWord, "valign"))
376                            {
377                               GetKeyWord(&string, keyWord, sizeof(keyWord));
378                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
379                               {
380                                  subBlock.valign = middle;
381                               }
382                            }
383                            else if(!strcmpi(keyWord, "align"))
384                            {
385                               GetKeyWord(&string, keyWord, sizeof(keyWord));
386                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
387                               {
388                                  subBlock.halign = middle;
389                               }
390                               else if(!strcmpi(keyWord, "left"))
391                               {
392                                  subBlock.halign = left;
393                               }
394                               else if(!strcmpi(keyWord, "right"))
395                               {
396                                  subBlock.halign = right;
397                               }
398                               else if(!strcmpi(keyWord, "top"))
399                               {
400                                  subBlock.valign = top;
401                               }
402                               else if(!strcmpi(keyWord, "bottom"))
403                               {
404                                  subBlock.valign = bottom;
405                               }
406                            }
407                         }
408                      }
409                      else if(!strcmpi(keyWord, "title"))
410                      {
411                         block = AddBlock(block, TITLE);
412                         titleBlock = block;
413                      }
414                      else if(!strcmpi(keyWord, "body"))
415                      {
416                         block = AddBlock(block, BODY);
417                         body = block;
418                         lastCh = ' ';
419
420                         for(;string[0];)
421                         {
422                            GetKeyWord(&string, keyWord, sizeof(keyWord));
423                            if(!strcmpi(keyWord, "bgcolor"))
424                            {
425                               GetKeyWord(&string, keyWord, sizeof(keyWord));
426                               background = !strcmpi(keyWord, "#fff") ?  white : strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
427                               if(keyWord[0] != '#' || strlen(keyWord) <= 7)
428                                  background |= 0xFF000000;
429                            }
430                            else if(!strcmpi(keyWord, "text"))
431                            {
432                               GetKeyWord(&string, keyWord, sizeof(keyWord));
433                               fontBlock.textColor =
434                                  block.textColor = 0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
435                            }
436                         }
437                      }
438                      else if(!strcmpi(keyWord, "br") || (!lastBR && (!strcmpi(keyWord, "div") || !strcmpi(keyWord, "li"))))
439                      {
440                         if(!lastBR || (lastCh && lastCh != ' '))
441                         {
442                            subBlock = AddBlock(block, BR);
443                            lastCh = ' ';
444                            lastBR = true;
445                         }
446                      }
447                      else if(!strcmpi(keyWord, "/ul"))
448                      {
449                         lastBR = false;
450                      }
451                      else if(!strcmpi(keyWord, "/ul"))
452                      {
453                         lastBR = false;
454                      }
455                      else if(!strcmpi(keyWord, "/div"))
456                      {
457                         if(!lastBR)
458                         {
459                            subBlock = AddBlock(block, BR);
460                            lastBR = true;
461                         }
462                         else
463                            lastBR = false;
464                      }
465                      else if(!strcmpi(keyWord, "code"))
466                      {
467                         code = true;
468                      }
469                      else if(!strcmpi(keyWord, "/code"))
470                      {
471                         code = false;
472                      }
473                      else if(!strcmpi(keyWord, "p"))
474                      {
475                      }
476                      else if(!strcmpi(keyWord, "font") || !strcmpi(keyWord, "b") || !strcmpi(keyWord, "i")
477                         || !strcmpi(keyWord, "strong") || !strcmpi(keyWord, "em") ||
478                         !strcmpi(keyWord, "h1") || !strcmpi(keyWord, "h2") || !strcmpi(keyWord, "h3"))
479                      {
480                         if((!strcmpi(keyWord, "h1") || !strcmpi(keyWord, "h2") || !strcmpi(keyWord, "h3")))
481                         {
482                            if(!lastBR || (lastCh && lastCh != ' '))
483                            {
484                               if(!lastBR)
485                                  subBlock = AddBlock(block, BR);
486                               subBlock = AddBlock(block, BR);
487                               lastBR = true;
488                            }
489                            lastCh = ' ';
490                         }
491                         subBlock = AddBlock(block, FONT);
492                         subBlock.attribs = fontBlock.attribs;
493                         if(!strcmpi(keyWord, "font"))
494                         {
495                            for(;string[0];)
496                            {
497                               GetKeyWord(&string, keyWord, sizeof(keyWord));
498
499                               if(!strcmpi(keyWord, "face"))
500                               {
501                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
502                                  delete subBlock.face;
503                                  subBlock.face = CopyString(keyWord);
504                               }
505                               else if(!strcmpi(keyWord, "size"))
506                               {
507                                  #define NUM_FONT_SIZES  7
508                                  static float fontSizes[NUM_FONT_SIZES] = { 7.5f, 10, 12, 13.5f, 18, 24, 36 };
509                                  int size;
510                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
511                                  size = atoi(keyWord);
512                                  if(keyWord[0] == '+')
513                                  {
514                                     int c;
515                                     int numTimes = atoi(keyWord+1);
516                                     for(c = 0; c<NUM_FONT_SIZES-1; c++)
517                                        if(fontSizes[c] > fontBlock.size)
518                                        {
519                                           numTimes--;
520                                           if(!numTimes) break;
521                                        }
522                                     subBlock.size = fontSizes[c];
523                                  }
524                                  else if(keyWord[0] == '-')
525                                  {
526                                     int c;
527                                     int numTimes = atoi(keyWord+1);
528                                     for(c = NUM_FONT_SIZES-1; c>0; c--)
529                                        if(fontSizes[c] < fontBlock.size)
530                                        {
531                                           numTimes--;
532                                           if(!numTimes) break;
533                                        }
534                                     subBlock.size = fontSizes[c];
535                                  }
536                                  else if(size && size < NUM_FONT_SIZES)
537                                  {
538                                     subBlock.size = fontSizes[size-1];
539                                  }
540
541                               }
542                               else if(!strcmpi(keyWord, "color"))
543                               {
544                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
545                                  subBlock.textColor =
546                                     0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16);
547                               }
548                            }
549                         }
550                         else
551                         {
552                            if(!strcmpi(keyWord, "b") || !strcmpi(keyWord, "strong"))
553                               subBlock.attribs |= FONT_BOLD;
554                            else if(!strcmpi(keyWord, "i") || !strcmpi(keyWord, "em"))
555                               subBlock.attribs |= FONT_ITALIC;
556                            else if(!strcmpi(keyWord, "h1"))
557                               subBlock.size = 20;
558                            else if(!strcmpi(keyWord, "h2"))
559                               subBlock.size = 16;
560                            else if(!strcmpi(keyWord, "h3"))
561                               subBlock.size = 14;
562                         }
563
564                         if(!subBlock.face) subBlock.face = CopyString(fontBlock.face);
565                         if(!subBlock.size) subBlock.size = fontBlock.size;
566                         if(!subBlock.textColor) subBlock.textColor = fontBlock.textColor;
567
568                         subBlock.prevFont = fontBlock;
569                         fontBlock = subBlock;
570                         block = subBlock;
571                      }
572                      else if(!strcmpi(keyWord, "/font") ||
573                              !strcmpi(keyWord, "/b") ||
574                              !strcmpi(keyWord, "/strong") ||
575                              !strcmpi(keyWord, "/em") ||
576                              !strcmpi(keyWord, "/i") ||
577                              !strcmpi(keyWord, "/h1") ||
578                              !strcmpi(keyWord, "/h2") ||
579                              !strcmpi(keyWord, "/h3"))
580                      {
581                         /*while(block.type != FONT && block.parent && block.parent.type != BODY)
582                            block = block.parent;*/
583                         if(block.type == FONT || block.type == ANCHOR)
584                         {
585                            fontBlock = block.prevFont;
586                            block = block.parent;
587                         }
588                         if(!lastBR && (!strcmpi(keyWord, "/h1") || !strcmpi(keyWord, "/h2") || !strcmpi(keyWord, "/h3")))
589                         {
590                            subBlock = AddBlock(block, BR);
591                            lastBR = true;
592                         }
593                      }
594                      else if(!strcmpi(keyWord, "a"))
595                      {
596                         int textDecoration = 0;
597                         Block anchor { type = ANCHOR, parent = block };
598
599                         for(;string[0];)
600                         {
601                            GetKeyWord(&string, keyWord, sizeof(keyWord));
602
603                            if(!strcmpi(keyWord, "name"))
604                            {
605                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, false);
606                               delete anchor.anchor;
607                               anchor.anchor = CopyString(keyWord);
608                            }
609                            else if(!strcmpi(keyWord, "href"))
610                            {
611                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, true);
612                               delete anchor.href;
613                               anchor.href = CopyString(keyWord);
614                               if(!textDecoration)
615                                  textDecoration = 1;
616                            }
617                            else if(!strcmpi(keyWord, "style"))
618                            {
619                               //for(;string[0];)
620                               {
621                                  GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, false);
622                                  if(strstr(keyWord, "text-decoration:") && strstr(keyWord, "none;"))
623                                     textDecoration = 2;
624                               }
625                            }
626                         }
627
628                         if(anchor.href && (/*lastBR || */isalnum(lastCh)))
629                         {
630                            subBlock = AddBlock(block, TEXT);
631                            subBlock.text = CopyString("  ");
632                            subBlock.textLen = 2;
633                            subBlock.prevFont = fontBlock;
634                         }
635                         subBlock = anchor;
636                         block.subBlocks.Add(subBlock);
637
638                         subBlock.attribs = fontBlock.attribs | FONT_BOLD;
639                         delete subBlock.face;
640                         subBlock.face = CopyString(fontBlock.face);
641                         subBlock.size = fontBlock.size;
642                         subBlock.textColor = Color { 85,85,255 };
643                         subBlock.prevFont = fontBlock;
644                         if(textDecoration == 1) subBlock.attribs |= FONT_UNDERLINE;
645                         fontBlock = subBlock;
646                         block = subBlock;
647
648                         lastCh = 0;
649                      }
650                      /*else if(!strcmpi(keyWord, "/span"))
651                      {
652                         if(isalnum(lastCh))
653                         {
654                            subBlock = AddBlock(block, TEXT);
655                            subBlock.text = CopyString("  ");
656                            subBlock.textLen = 2;
657                            subBlock.prevFont = block.parent.prevFont;
658                         }
659                         lastCh = 0;
660                      }*/
661                      else if(!strcmpi(keyWord, "/a"))
662                      {
663                         if(block.type == ANCHOR)
664                         {
665                            fontBlock = block.prevFont;
666                            block = block.parent;
667                         }
668                      }
669                      else if(!strcmpi(keyWord, "script"))
670                      {
671                         insideScript++;
672                      }
673                      else if(!strcmpi(keyWord, "/script"))
674                      {
675                         if(insideScript)
676                            insideScript--;
677                      }
678                      else if(!strcmpi(keyWord, "style"))
679                      {
680                         insideStyle++;
681                      }
682                      else if(!strcmpi(keyWord, "/style"))
683                      {
684                         if(insideStyle)
685                            insideStyle--;
686                      }
687                      else if(!strcmpi(keyWord, "input") || !strcmpi(keyWord, "button"))
688                      {
689                         subBlock = AddBlock(block, INPUT);
690                         for(;string[0];)
691                         {
692                            GetKeyWord(&string, keyWord, sizeof(keyWord));
693
694                            if(!strcmpi(keyWord, "type"))
695                            {
696                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), true, true);
697                               if(!strcmpi(keyWord, "text"))
698                               {
699                                  subBlock.inputType = InputType::text;
700                               }
701                               else if(!strcmpi(keyWord, "submit") || !strcmpi(keyWord, "image"))
702                               {
703                                  subBlock.inputType = submit;
704                               }
705                               else if(!strcmpi(keyWord, "radio"))
706                               {
707                                  subBlock.inputType = radio;
708                               }
709                               else if(!strcmpi(keyWord, "checkbox"))
710                               {
711                                  subBlock.inputType = checkbox;
712                               }
713                               else if(!strcmpi(keyWord, "hidden"))
714                               {
715                                  subBlock.inputType = hidden;
716                               }
717                               delete subBlock.href;
718                               subBlock.href = CopyString(keyWord);
719                            }
720                            else if(!strcmpi(keyWord, "size"))
721                            {
722                               GetKeyWord(&string, keyWord, sizeof(keyWord));
723                               subBlock.size = atoi(keyWord);
724                            }
725                            else if(!strcmpi(keyWord, "maxlength"))
726                            {
727                               int maxlength;
728                               GetKeyWord(&string, keyWord, sizeof(keyWord));
729                               maxlength = atoi(keyWord);
730                            }
731                            else if(!strcmpi(keyWord, "value"))
732                            {
733                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, true);
734                               delete subBlock.value;
735                               subBlock.value = EncodeString(keyWord, null);
736                            }
737                            else if(!strcmpi(keyWord, "name"))
738                            {
739                               GetKeyWord(&string, keyWord, sizeof(keyWord));
740                               delete subBlock.name;
741                               subBlock.name = CopyString(keyWord);
742                            }
743                            else if(!strcmpi(keyWord, "src"))
744                            {
745                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, false);
746                               delete subBlock.src;
747                               subBlock.src = keyWord[0] ? CopyString(keyWord) : null;
748                            }
749                         }
750                      }
751                      else if(!strcmpi(keyWord, "form"))
752                      {
753                         subBlock = AddBlock(block, FORM);
754                         for(;string[0];)
755                         {
756                            GetKeyWord(&string, keyWord, sizeof(keyWord));
757
758                            if(!strcmpi(keyWord, "action"))
759                            {
760                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, false);
761                               delete subBlock.action;
762                               subBlock.action = CopyString(keyWord);
763                            }
764                         }
765                         block = subBlock;
766                      }
767                      else if(!strcmpi(keyWord, "/form"))
768                      {
769                         if(block.type == FORM)
770                         {
771                            block = block.parent;
772                         }
773                      }
774                      else if(!strcmpi(keyWord, "center"))
775                      {
776                         subBlock = AddBlock(block, CENTER);
777                         block = subBlock;
778                      }
779                      else if(!strcmpi(keyWord, "/center"))
780                      {
781                         if(block.type == CENTER)
782                         {
783                            block = block.parent;
784                         }
785                      }
786                      else if(!strcmpi(keyWord, "base"))
787                      {
788                         while(string[0])
789                         {
790                            GetKeyWord(&string, keyWord, sizeof(keyWord));
791
792                            if(!strcmpi(keyWord, "href"))
793                            {
794                               GetKeyWordEx(&string, keyWord, sizeof(keyWord), false, true);
795                               delete baseHRef;
796                               baseHRef = ParseURL(keyWord);
797                            }
798                         }
799                      }
800                      else if(!strcmpi(keyWord, "table"))
801                      {
802                         lastCh = ' ';
803                         subBlock = AddBlock(block, TABLE);
804                         subBlock.cellPadding = 4;
805                         for(;string[0];)
806                         {
807                            GetKeyWord(&string, keyWord, sizeof(keyWord));
808
809                            if(!strcmpi(keyWord, "width"))
810                            {
811                               GetKeyWord(&string, keyWord, sizeof(keyWord));
812                               if(strstr(keyWord, "%"))
813                                  subBlock.pWidth = atoi(keyWord);
814                               else
815                                  subBlock.width = atoi(keyWord);
816                            }
817                            else if(!strcmpi(keyWord, "height"))
818                            {
819                               GetKeyWord(&string, keyWord, sizeof(keyWord));
820                               if(strstr(keyWord, "%"))
821                                  subBlock.pHeight = atoi(keyWord);
822                               else
823                                  subBlock.height = atoi(keyWord);
824                            }
825                            else if(!strcmpi(keyWord, "cellpadding"))
826                            {
827                               GetKeyWord(&string, keyWord, sizeof(keyWord));
828                               subBlock.cellPadding = atoi(keyWord);
829                            }
830                            else if(!strcmpi(keyWord, "background"))
831                            {
832                               GetKeyWord(&string, keyWord, sizeof(keyWord));
833                               delete subBlock.src;
834                               subBlock.src = CopyString(keyWord);
835                            }
836                            else if(!strcmpi(keyWord, "bgcolor"))
837                            {
838                               GetKeyWord(&string, keyWord, sizeof(keyWord));
839                               subBlock.bgColor = !strcmpi(keyWord, "#fff") ? white : (0xFF000000 | strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16));
840                            }
841                            else if(!strcmpi(keyWord, "align"))
842                            {
843                               GetKeyWord(&string, keyWord, sizeof(keyWord));
844                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
845                               {
846                                  subBlock.halign = middle;
847                               }
848                               else if(!strcmpi(keyWord, "left"))
849                               {
850                                  subBlock.halign = left;
851                               }
852                               else if(!strcmpi(keyWord, "right"))
853                               {
854                                  subBlock.halign = right;
855                               }
856                            }
857                         }
858
859                         block = subBlock;
860                      }
861                      else if(!strcmpi(keyWord, "/table"))
862                      {
863                         if(block.type == TD)
864                         {
865                            block = block.parent;
866                            lastCh = 0;//' ';
867                         }
868                         if(block.type == TR)
869                         {
870                            block = block.parent;
871                            lastCh = 0;//' ';
872                         }
873                         if(block.type == TABLE)
874                         {
875                            block = block.parent;
876                            lastCh = 0;//' ';
877                         }
878                      }
879                      else if(!strcmpi(keyWord, "tr"))
880                      {
881                         if(block.type == TD)
882                         {
883                            block = block.parent;
884                            lastCh = ' ';
885                         }
886                         if(block.type == TR)
887                         {
888                            block = block.parent;
889                            lastCh = ' ';
890                         }
891
892                         subBlock = AddBlock(block, TR);
893
894                         for(;string[0];)
895                         {
896                            GetKeyWord(&string, keyWord, sizeof(keyWord));
897                            /*
898                            if(!strcmpi(keyWord, "height"))
899                            {
900                               GetKeyWord(&string, keyWord, sizeof(keyWord));
901                               subBlock.height = atoi(keyWord);
902                            }
903                            */
904                            if(!strcmpi(keyWord, "align"))
905                            {
906                               GetKeyWord(&string, keyWord, sizeof(keyWord));
907                               if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
908                               {
909                                  subBlock.valign = middle;
910                               }
911                               else if(!strcmpi(keyWord, "top"))
912                               {
913                                  subBlock.valign = top;
914                               }
915                               else if(!strcmpi(keyWord, "bottom"))
916                               {
917                                  subBlock.valign = bottom;
918                               }
919                            }
920                         }
921                         block = subBlock;
922                      }
923                      else if(!strcmpi(keyWord, "/tr"))
924                      {
925                         if(block.type == TD)
926                         {
927                            block = block.parent;
928                            lastCh = ' ';
929                         }
930                         if(block.type == TR)
931                         {
932                            block = block.parent;
933                            lastCh = ' ';
934                         }
935                      }
936                      else if(!strcmpi(keyWord, "td"))
937                      {
938                         while(block && block.type != TR && block.type != TABLE)
939                         {
940                            if(block == fontBlock)
941                               fontBlock = block.prevFont;
942                            block = block.parent;
943                            lastCh = ' ';
944                         }
945                         if(block)
946                         {
947                            if(block.type == TABLE)
948                            {
949                               block = AddBlock(block, TR);
950                            }
951                            subBlock = AddBlock(block, TD);
952                            subBlock.span = subBlock.rowSpan = 1;
953                            subBlock.valign = block.valign;
954                            subBlock.halign = block.halign;
955
956                            for(;string[0];)
957                            {
958                               GetKeyWord(&string, keyWord, sizeof(keyWord));
959
960                               if(!strcmpi(keyWord, "width"))
961                               {
962                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
963                                  if(strstr(keyWord, "%"))
964                                     subBlock.pWidth = atoi(keyWord);
965                                  else
966                                     subBlock.width = atoi(keyWord);
967                               }
968                               else if(!strcmpi(keyWord, "height"))
969                               {
970                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
971                                  if(strstr(keyWord, "%"))
972                                     subBlock.pHeight = atoi(keyWord);
973                                  else
974                                     subBlock.height = atoi(keyWord);
975                               }
976                               else if(!strcmpi(keyWord, "colspan"))
977                               {
978                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
979                                  subBlock.span = atoi(keyWord);
980                               }
981                               else if(!strcmpi(keyWord, "cellpadding"))
982                               {
983                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
984                                  subBlock.cellPadding = atoi(keyWord);
985                               }
986                               else if(!strcmpi(keyWord, "rowspan"))
987                               {
988                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
989                                  subBlock.rowSpan = atoi(keyWord);
990                               }
991                               else if(!strcmpi(keyWord, "nowrap"))
992                               {
993                                  subBlock.noWrap = true;
994                               }
995                               else if(!strcmpi(keyWord, "background"))
996                               {
997                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
998                                  delete subBlock.src;
999                                  subBlock.src = CopyString(keyWord);
1000                               }
1001                               else if(!strcmpi(keyWord, "bgcolor"))
1002                               {
1003                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
1004                                  subBlock.bgColor = !strcmpi(keyWord, "#fff") ? white : (0xFF000000 |strtol((keyWord[0] == '#') ? (keyWord+1) : keyWord, null, 16));
1005                               }
1006                               else if(!strcmpi(keyWord, "valign"))
1007                               {
1008                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
1009                                  if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
1010                                  {
1011                                     subBlock.valign = middle;
1012                                  }
1013                                  else if(!strcmpi(keyWord, "top"))
1014                                  {
1015                                     subBlock.valign = top;
1016                                  }
1017                                  else if(!strcmpi(keyWord, "bottom"))
1018                                  {
1019                                     subBlock.valign = bottom;
1020                                  }
1021                               }
1022                               else if(!strcmpi(keyWord, "align"))
1023                               {
1024                                  GetKeyWord(&string, keyWord, sizeof(keyWord));
1025                                  if(!strcmpi(keyWord, "middle") || !strcmpi(keyWord, "center") )
1026                                  {
1027                                     subBlock.halign = middle;
1028                                  }
1029                                  else if(!strcmpi(keyWord, "left"))
1030                                  {
1031                                     subBlock.halign = left;
1032                                  }
1033                                  else if(!strcmpi(keyWord, "right"))
1034                                  {
1035                                     subBlock.halign = right;
1036                                  }
1037                               }
1038                            }
1039                            block = subBlock;
1040                         }
1041                      }
1042                      else if(!strcmpi(keyWord, "/td"))
1043                      {
1044                         if(block.type == TD)
1045                         {
1046                            block = block.parent;
1047                            lastCh = 0;//' ';
1048                         }
1049                      }
1050                      else if(!strcmpi(keyWord, "/html"))
1051                         break;
1052                   }
1053                }
1054                else
1055                {
1056                   if(tagLen < MAX_TAG_LEN-1)
1057                   {
1058                      tag[tagLen++] = ch;
1059                      tag[tagLen] = '\0';
1060                   }
1061                }
1062             }
1063             else
1064             {
1065                if((insideScript || insideStyle) && !tagLen && ch != '/')
1066                {
1067                   insideTag = false;
1068                }
1069                else
1070                {
1071                   if(tagLen < MAX_TAG_LEN-1)
1072                   {
1073                      tag[tagLen++] = ch;
1074                      tag[tagLen] = '\0';
1075                   }
1076                }
1077             }
1078             if(!strcmp(tag, "!--"))
1079             {
1080                commented = true;
1081                insideTag = false;
1082                tagLen = 0;
1083                tag[tagLen] = '\0';
1084             }
1085          }
1086          else
1087          {
1088             if(ch == '<')
1089             {
1090                if(!insideScript && !insideStyle)
1091                {
1092                   if(textLen)
1093                   {
1094                      if(block.type == TABLE)
1095                      {
1096                         subBlock = AddBlock(block, TR);
1097                         block = subBlock;
1098                      }
1099                      if(block.type == TR)
1100                      {
1101                         subBlock = AddBlock(block, TD);
1102                         subBlock.span = subBlock.rowSpan = 1;
1103                         subBlock.valign = block.valign;
1104                         subBlock.halign = block.halign;
1105                         block = subBlock;
1106                      }
1107
1108                      subBlock = AddBlock(block, TEXT);
1109                      delete subBlock.text;
1110                      subBlock.text = EncodeString(text, &textLen);
1111                      subBlock.textLen = textLen;
1112                      if(block.type != TITLE)
1113                         lastBR = false;
1114
1115                      textLen = 0;
1116                      text[0] = '\0';
1117                   }
1118                }
1119
1120                insideTag = true;
1121                tagLen = 0;
1122             }
1123             else if(ch != '\n' && ch != '\r' && ch != '\t' && !insideScript && !insideStyle)
1124             {
1125                if(insideSymbol)
1126                {
1127                   if(ch == ';')
1128                   {
1129                      unichar unicode = 0;
1130                      char utf8[5];
1131                      if(symbol[0] == '#' && symbol[1] == 'x')
1132                         unicode = strtol(symbol+2, null, 16);
1133                      else if(!strcmpi(symbol, "nbsp"))
1134                         unicode = ' ';
1135                      else if(!strcmpi(symbol, "copy"))
1136                         unicode ='©';
1137                      else if(!strcmpi(symbol, "raquo"))
1138                         unicode = '»';
1139                      else if(!strcmpi(symbol, "eacute"))
1140                         unicode = 'é';
1141                      else if(!strcmpi(symbol, "egrave"))
1142                         unicode = 'è';
1143                      else if(!strcmpi(symbol, "ecirc"))
1144                         unicode = 'ê';
1145                      else if(!strcmpi(symbol, "agrave"))
1146                         unicode = 'à';
1147                      else if(!strcmpi(symbol, "acirc"))
1148                         unicode = 'â';
1149                      else if(!strcmpi(symbol, "ocirc"))
1150                         unicode = 'ô';
1151                      if(unicode)
1152                      {
1153                         int len = UTF32toUTF8Len(&unicode, 1, utf8, 5);
1154                         int c;
1155                         for(c = 0; c<len; c++)
1156                            text[textLen++] = utf8[c];
1157                         lastCh = 0;
1158                      }
1159                      text[textLen] = '\0';
1160                      insideSymbol = false;
1161                   }
1162                   else if(ch == ' ')
1163                   {
1164                      // Give up... Treat it as text
1165                      text[textLen++] = '&';
1166                      CopyBytes(text + textLen, symbol, symbolLen);
1167                      textLen += symbolLen;
1168                      lastCh = text[textLen++] = ' ';
1169                      text[textLen] = '\0';
1170                      insideSymbol = false;
1171                   }
1172                   else
1173                   {
1174                      if(symbolLen < MAX_SYMBOL_LEN-1)
1175                      {
1176                         symbol[symbolLen++] = ch;
1177                         symbol[symbolLen] = '\0';
1178                      }
1179                   }
1180                }
1181                else
1182                {
1183                   if(ch == '&')
1184                   {
1185                      insideSymbol = true;
1186                      symbol[0] = '\0';
1187                      symbolLen = 0;
1188                   }
1189                   else
1190                   {
1191                      if(ch != ' ' || lastCh != ' ' || code)
1192                      // if(ch != ' ' || (textLen && text[textLen-1] !=' '))
1193                      {
1194                         lastCh = ch;
1195                         text[textLen++] = ch;
1196                         text[textLen] = '\0';
1197                      }
1198                   }
1199                }
1200             }
1201             else if(ch == '\n' && code)
1202             {
1203                //printf("%d\n", textLen);
1204                //puts(text);
1205
1206                subBlock = AddBlock(block, TEXT);
1207                delete subBlock.text;
1208                subBlock.text = EncodeString(text, &textLen);
1209                subBlock.textLen = textLen;
1210                textLen = 0;
1211                text[0] = '\0';
1212                subBlock = AddBlock(block, BR);
1213             }
1214          }
1215       }
1216       delete text;
1217       /*while(!f.Eof())
1218       {
1219          byte ch = 0;
1220          f.Getc(&ch);
1221       }*/
1222       return result;
1223    }
1224
1225    property String title
1226    {
1227       get
1228       {
1229          if(titleBlock && titleBlock.subBlocks.first && ((Block)titleBlock.subBlocks.first).type == TEXT)
1230          {
1231             Block t = titleBlock.subBlocks.first;
1232             return t.text;
1233          }
1234          return null;
1235       }
1236    }
1237 }