unit StringFunctions;

{
 [StringFunctions] [6.1]
Delphi 2005
January 2007

LICENSE

The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
"http://www.mozilla.org/MPL/"

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
the specific language governing rights and limitations under the License.

The Original Code is "[StringFunctions.pas]".

The Initial Developer of the Original Code is Martin Holmes (Victoria,
BC, Canada, "http://www.mholmes.com/"). Copyright (C) 2006 Martin Holmes
and the University of Victoria Computing and Media Centre. The code was 
co-developed for university and personal projects, and rights are shared
by Martin Holmes and the University of Victoria. All Rights Reserved.
}

{
This is a huge mass of various string and widestring functions created over
the years and through various projects. Caveat user -- many are obsolete and
only those used in current applications are regularly tested!

Dependencies:

TntUnicodeControls (Troy Wolbrink)
}

{$R Charcode.res}

interface

uses Windows, Forms, SysUtils, Classes, JclUnicode, TntClasses, TntSysUtils, WideStrUtils;

const
	PuncMarks: set of Char = ['!', '"', '(', ')', #91, #93, '{', '}', ',', '.', ';', ':', '<', '>', '?', '‹', '›', '¡', '`', '‚', '„', '…', '‘', '’', '“', '”', '«', '»', '¿'];
  XMLWhitespace = [WideChar(#32), WideChar(#13), WideChar(#10), WideChar(#9)];
  WebExts = WideString('htm||html||asp||aspx||shtml||gif||jpg||png||pdf||cgi||pl||cfm||cfml||shtm||php||php3||xml||xhtml||svg');

function PosEx(const SubStr: string; const S: string; StartIndex: integer): integer;
function Webble(InString: string): string;
function Webble2(InString: string): string;
function GetDecUnicodeNumFromWinNum(WinNum: integer): integer;
function UnWebble(InString: string): string;
function MacCharToPC(InString: string): string;
function PCCharToMac(InString: string): string;
function PCCharToHex(InString: string): string;
function HexToPCChar(InString: string): string;
function PCCharToMacHex(InString: string): string;
function PCCharToUnderscoreHex(InString: string): string;
function AllCharsToHTMLHex(InString: string): string;
function MakeEscapeLookup(InString: string): string;
function TrimString(InString: string): string;
function RemoveReturns(InString: string): string;
function HTMLParas(InString: string): string;
function ReturnsToBR(InString: string): string;
function StripPunctuation(InString: string): string;
function HideReturns(InString: string): string;
function RestoreReturns(InString: string): string;
function ReplaceStuff(Token, Replacement, InString: string): string;
function WReplaceStuff(Token, Replacement, InString: WideString): WideString;
function EscapeSingleQuotes(InString: string): string;
function EscapeDoubleQuotes(InString: string): string;
function EscapeAllQuotes(InString: string): string;
function QuotesToPercentEscapes(InString: string): string;
function DQuotesToEntity(InString: string): string;
function EscapeAngleBrackets(InString: string): string;
function MakeNonBreaking(InString: string): string;
function GetLastHeadCloseTagLocation(InString: string): integer;
function GetLastBodyOpenTagLocation(InString: string): integer;
function GetLastBodyCloseTagLocation(InString: string): integer;
function IsGoodEMailAddress(InString: string): Boolean;

function RemoveDoubleSpaces(InString: string): string;
function WRemoveDoubleSpaces(InString: WideString): WideString;
function WRemoveReturns(InString: WideString): WideString;
function GetTextBetween(InString, StartMarker, EndMarker: string): string;
function WGetTextBetween(InString, StartMarker, EndMarker: WideString): WideString;
function GetTextBetweenExtra(var OriginalString: string; StartMarker, EndMarker, Replacement: string; IncludeMarkers, Replace: Boolean): string;
function ReplaceTextFromTo(InString: string; StartMarker, EndMarker, Replacement: string): string;
procedure SplitWords(InString: string; var OutList: TStringList);
procedure WideSplitWords(InString: WideString; var OutList: TWideStringList);

function RemoveFileExtension(InString: string): string;

function SplitString(CharsPerString: integer; InString: string; var OutList: TStringList): Boolean;
function SplitStringToLines(LineEndToken: string; InString: string; var OutList: TStringList): Boolean;
function SplitStringToLinesEx(LineEndToken: string; InString: string; var OutList: TStringList): Boolean;
function MakeJavaScriptString(StringName, InString: string): string;
function MakeHexJavaScriptString(StringName, InString: string): string;
function MakeJavaScriptArray(ArrayName: string; ArrayItems: TStringList; StartFrom: integer): string;

function StripChars(CharsToStrip: string; InString: string): string;

function IncludeStuff(InString, Tag: string; IncludeIt: Boolean): string;
function InsertMetaTag(WebPage, ProgName, UserName: string): string;
function RemoveJSComments(InString: string): string;
function RemoveJSComments2(InString: string): string;

function SeparateJavaScript(var InString: string; HTMLFileName: string): Boolean;
function SeparateFirstJavaScript(var InString: string; HTMLFileName: string): Boolean;

function CleanupFileName(InName: string): string;//Removes iffy chars and spaces from file names

function TextToAudioFileName(Text: WideString; Extension: WideString): WideString;
function AudioFileNameToText(AudioFileName: WideString; Extension: WideString): WideString;

function SLIndexOf(SList: TStringList; S: string; CaseSensitive: Boolean): integer;

//Unicode/WideString functions
function WPosEx(const SubStr: WideString; const S: WideString; StartIndex: integer): integer; //My code
function WidePosEx(const SubStr, S: WideString; Offset: Integer = 1): Integer; //Adapted Delphi code
function WideStringToANSI(WS: WideString): string;
function WEscapeSingleQuotes(InString: Widestring): Widestring;
function WEscapeDoubleQuotes(InString: Widestring): Widestring;
function WEscapeAllQuotes(InString: Widestring): Widestring;
function WMakeCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString;
function WUriAsCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString;
function WUnescapeXMLAttribute(InString: WideString): WideString;
function WEscapeAngleBrackets(InString: Widestring): Widestring;
function WNormalizeReturns(InString: WideString): WideString;

function WideStringToHTMLNumeric(InString: WideString): string;
function WideStringToHTMLNumericAbove255(InString: WideString): string;
function WideStringToHTMLHex(InString: WideString): string;
function WideStringToHTMLOutput(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion
function WideStringToHTMLOutputCentredRTL(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion
function WideStringToJSUnicode(InString: WideString): string;
function WideStringToWideJSUnicode(InString: WideString): WideString;
function WideStringToExplanation(InString: WideString): string;
function WReturnsToBR(InString: WideString): WideString;
function WideStringToJSUnicodeEx(InString: WideString): string;
function WideStringToJSUnicodeExNoTags(InString: WideString): string;
function WideStringFromHTMLNumeric(InString: string): WideString;
function WideStringFromWSHTMLNumeric(InString: WideString): WideString;
function HDecimalUCodeToJHexUCode(InString: string): string;
function JHexUCodeToHDecimalUCode(InString: string): string;
function JHexUCodeToWideString(InString: WideString): WideString;
function PCCharToJSUnicode(InString: string): string;

function PCCharToJSUnicodeEx(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption
function PCCharToHTMLNumeric(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption

function WAllCharsToJSUnicode(InString: WideString): WideString;
function WAllCharsToHTMLHex(InString: WideString): WideString;
function WTagsToLowerCase(InString: WideString): WideString;
function MakeRTLRightAlignedPara(InString: string): string;
function FirstNonPuncCharIsRTL(InString: WideString): Boolean;
function LastNonPuncCharIsRTL(InString: WideString): Boolean;
function HasMoreRTLCharsThanLTRChars(InString: WideString): Boolean;
function TextIsMostLikelyRTL(InString: WideString): Boolean;
function InsertRTLSpans(InString: WideString): WideString;
function ProcessForRTLBoundaries(InString: WideString; Centred: Boolean): WideString;
function TextIsMostLikelyCJK(InString: WideString): Boolean;

function ParseWideStringToPerceivedChars(var CharList: TWideStringList; WInString: WideString;
 RemoveAscii: Boolean): Boolean;

function ShowReturns(InString: string): string;
function UniqueIDFromTime: string;

procedure CopyUnicodeTextToClipboard(InText: WideString);
function WGetUnicodePunctuation(InString: WideString): string;
function WGetUnicodeOpenPunctuation(InString: WideString): string;
function WUnicodeIsSpacePunc(InChar: WideChar): Boolean;
function WUnicodeIsReturn(InChar: WideChar): Boolean;
function WUnicodeIsCJK(InChar: WideChar): Boolean;
function WUnicodeIsCombining(InChar: WideChar): Boolean;
function WUnicodeChangesDirectionality(RTL: Boolean; InChar: WideChar): Boolean;

function BuildWebKeypad(InString: WideString; ProcessForRTL: Boolean): string;
function WConvertUnmatchedAngleBrackets(InString: WideString): WideString;
function WParseTagOrAttributes(InString: WideString; uslPairs: TTntStringList): Boolean;
function WStrIsLikelyURL(InString: WideString): Boolean;

//This function checks whether a substring inside a widestring is a whole
//word (delimited by punctuation or spaces).
function IsWholeWordHit(wsText: WideString; StartPos, EndPos: integer): Boolean;

function ParseAppVersionFromWideString(wsInput: WideString;
                                        var verMajor: integer;
                                        var verMinor: integer;
                                        var verBuild: integer;
                                        var verRelease: integer;
                                        var wsOutput: WideString): Boolean;

function FileHasUTF8Header(const FileName: WideString): Boolean;
function HasUTF8Header(InString: string): Boolean;
function FileMayBeUTF8(FileName: WideString): Boolean;
function LoadFileCarefullyToWideString(FileName: WideString): WideString;
function GetFileCharSet(FileName: WideString): TTntStreamCharSet;
function WTruncWSToMaxLen(InString: WideString; MaxLen: integer): WideString;

implementation

type
 StrRec = record
  allocSiz: Longint;
  refCnt: Longint;
  length: Longint;
 end;

const
 skew = sizeof(StrRec);
 rOff = sizeof(StrRec)-sizeof(Longint);
 overHead = sizeof(StrRec)+1;

//Pos function with StartIndex parameter added. Uses record and constants above.
//Found somewhere on the Web many years ago; now not needed because Delphi
//includes one.
function PosEx(const SubStr: AnsiString; const S: AnsiString; StartIndex: integer): integer; assembler;
asm
{->EAX     Pointer to substr               }
{  EDX     Pointer to string               }
{<-EAX     Position of substr in s or 0    }
 TEST    EAX,EAX
 JE      @@noWork
 TEST    EDX,EDX
 JE      @@stringEmpty
 OR      ECX,ECX                         {StartIndex = 0 ? return 0      }
 JZ      @@stringEmpty
 CMP     ECX,80000000h
 JAE     @@stringEmpty                   {StartIndex < 0 ? return 0      }
 PUSH    EBX
 PUSH    ESI
 PUSH    EDI
 MOV     ESI,EAX                         { Point ESI to substr           }
 MOV     EDI,EDX                         { Point EDI to s                }
 MOV     EBX,ECX
 MOV     ECX,[EDI-skew].StrRec.length    { ECX = Length(s)               }
 CMP     ECX,EBX
 JB      @@badindex                      { StartIndex > Length(s) ? return 0             }
 SUB     ECX,EBX
 INC     ECX                             { adjust Length(s)              }
 PUSH    EDI                             { remember s position to calculate index        }
 ADD     EDI,EBX
 DEC     EDI                             { Point EDI to start of s + StartIndex - 1      }
 MOV     EDX,[ESI-skew].StrRec.length    { EDX = Length(substr)          }
 DEC     EDX                             { EDX = Length(substr) - 1              }
 JS      @@fail                          { < 0 ? return 0                        }
 MOV     AL,[ESI]                        { AL = first char of substr             }
 INC     ESI                             { Point ESI to 2'nd char of substr      }
 SUB     ECX,EDX                         { #positions in s to look at    }
 JLE     @@fail
@@loop:
 REPNE   SCASB
 JNE     @@fail
 MOV     EBX,ECX                         { save outer loop counter               }
 PUSH    ESI                             { save outer loop substr pointer        }
 PUSH    EDI                             { save outer loop s pointer             }
 MOV     ECX,EDX
 REPE    CMPSB
 POP     EDI                             { restore outer loop s pointer  }
 POP     ESI                             { restore outer loop substr pointer     }
 JE      @@found
 MOV     ECX,EBX                         { restore outer loop counter    }
 JMP     @@loop
@@fail:
 POP     EDX                             { get rid of saved s pointer    }
 XOR     EAX,EAX
 JMP     @@exit
@@stringEmpty:
 XOR     EAX,EAX
 JMP     @@noWork
@@badindex:
 XOR     EAX,EAX
 JMP     @@exit
@@found:
 POP     EDX                             { restore pointer to first char of s    }
 MOV     EAX,EDI                         { EDI points of char after match        }
 SUB     EAX,EDX                         { the difference is the correct index   }
@@exit:
 POP     EDI
 POP     ESI
 POP     EBX
@@noWork:
end;


//changes upper-ascii chars to html escape codes in a string
function Webble(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + LoadStr(Ord(InString[i]))
        else
        	OutString := OutString + InString[i];
     end;

     Result := OutString;
end;

//changes upper-ascii chars to unicode numeric html escape codes in a string
function Webble2(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
  if Length(InString) < 1 then
  	Exit;
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 159 then
        	OutString := OutString + '&#' + IntToStr(Ord(InString[i])) + ';'
        else
        	if Ord(InString[i]) > 127 then
           	OutString := OutString + '&#' + IntToStr(GetDecUnicodeNumFromWinNum(Ord(InString[i]))) + ';'
           else
        		OutString := OutString + InString[i];
     end;

     Result := OutString;
end;

function GetDecUnicodeNumFromWinNum(WinNum: integer): integer;
begin
	Case WinNum of
     128: Result := 8364;
     130: Result := 8218;
     131: Result := 402;
     132: Result := 8222;
     133: Result := 8230;
     134: Result := 8224;
     135: Result := 8225;
     136: Result := 710;
     137: Result := 8240;
     138: Result := 352;
     139: Result := 8249;
     140: Result := 338;
     142: Result := 381;
     145: Result := 8216;
     146: Result := 8217;
     147: Result := 8220;
     148: Result := 8221;
     149: Result := 8226;
     150: Result := 8211;
     151: Result := 8212;
     152: Result := 732;
     153: Result := 8482;
     154: Result := 353;
     155: Result := 8250;
     156: Result := 339;
     158: Result := 382;
     159: Result := 376;
  else
  	Result := WinNum;
  end;
end;

//Converts HTML escape codes back to winchars
function UnWebble(InString: string): string;
var
i: integer;
ECode: string;

begin
	for i := 128 to 255 do
  	begin
     	ECode := LoadStr(i);
        if Length(ECode) > 0 then
        	InString := ReplaceStuff(ECode, Chr(i), InString);
     end;
  Result := InString;
end;

//changes upper-ascii Mac chars to PC equivalents in a string
function MacCharToPC(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + LoadStr(Ord(InString[i]) + 1000)
        else
        	OutString := OutString + InString[i];
     end;

     Result := OutString;
end;

//changes upper-ascii PC chars to Mac equivalents in a string
function PCCharToMac(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + LoadStr(Ord(InString[i]) + 2000)
        else
        	OutString := OutString + InString[i];
     end;

     Result := OutString;
end;

//changes upper-ascii PC (ISO) char numbers to '%' + hexadecimal numbers (for JavaScript escape codes) in a string
function PCCharToHex(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + LoadStr(Ord(InString[i]) + 3000)
        else
           begin
           	if Ord(InString[i]) = 37 then
              	OutString := OutString + '%25'
              else
        			OutString := OutString + InString[i];
           end;
     end;

     Result := OutString;
end;

//Changes hexed numbers to PCChars
function HexToPCChar(InString: string): string;
var
i: integer;
ECode: string;

begin
	for i := 128 to 255 do
  	begin
     	ECode := LoadStr(3000+i);
        if Length(ECode) > 0 then
        	InString := ReplaceStuff(ECode, Chr(i), InString);
     end;
  Result := InString;
end;

//changes upper-ascii PC (ISO) char numbers to '%' + hexadecimal
//numbers of Mac equivalents (for JavaScript escape codes) in a string
function PCCharToMacHex(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + LoadStr(Ord(InString[i]) + 4000)
        else
        	begin
           	if Ord(InString[i]) = 37 then
              	OutString := OutString + '%25'
              else
        			OutString := OutString + InString[i];
           end;
     end;

     Result := OutString;
end;

function PCCharToUnderscoreHex(InString: string): string;
var
i: LongInt;
OutString: string;

begin
	OutString := '';
  for i := 1 to Length(InString) do
  	begin
     	if Ord(InString[i]) > 127 then
        	OutString := OutString + '_' + UpperCase(IntToHex(Ord(InString[i]), 2))
        else
        	OutString := OutString + InString[i];
     end;
  Result := OutString;
end;

function AllCharsToHTMLHex(InString: string): string;
var
i: LongInt;
OutString: string;
CharNum: integer;

begin
	OutString := '';
  for i := 1 to Length(InString) do
  	begin
			CharNum := Ord(InString[i]);
        if (CharNum < 160) and (CharNum > 127) then
        	CharNum := GetDecUnicodeNumFromWinNum(CharNum);
     	OutString := OutString + '&#x' + UpperCase(IntToHex(CharNum, 4)) + ';';
     end;

  Result := OutString;
end;

function MakeEscapeLookup(InString: string): string;
var
i: integer;
CharArray: string;
EscapeArray: string;
UsedList: string;
Counter: integer;

begin
	Result := '';
  CharArray := 'CharList = new Array();' + #13#10;
  EscapeArray := 'EscapeList = new Array();' +#13#10;

	if Length(InString) < 1 then
  	begin
        Result := CharArray + EscapeArray;
  		Exit;
     end;

  Counter := 0;

	for i := 1 to Length(InString) do
  	begin
     	if (Ord(InString[i]) > 127) and (Pos(InString[i], UsedList) < 1) then
        	begin
           	UsedList := UsedList + InString[i];
           	CharArray := CharArray + 'CharList['
              				+ IntToStr(Counter)
									+ ']=''' + LoadStr(Ord(InString[i]) + 3000)
                          + ''';' + #13#10;
              EscapeArray := EscapeArray + 'EscapeList['
              				+ IntToStr(Counter)
									+ ']=''' + LoadStr(Ord(InString[i]))
                          + ''';' + #13#10;
              inc(Counter);
              if AnsiUpperCase(InString[i]) <> InString[i] then
              	begin
                    CharArray := CharArray + 'CharList['
                             + IntToStr(Counter)
                             + ']=''' + LoadStr(Ord(AnsiUpperCase(InString)[i]) + 3000)
                             + ''';' + #13#10;
                 	EscapeArray := EscapeArray + 'EscapeList['
                             + IntToStr(Counter)
                             + ']=''' + LoadStr(Ord(AnsiUpperCase(InString)[i]))
                             + ''';' + #13#10;
                 	inc(Counter);
                 end;

              if AnsiLowerCase(InString[i]) <> InString[i] then
              	begin
                    CharArray := CharArray + 'CharList['
                             + IntToStr(Counter)
                             + ']=''' + LoadStr(Ord(AnsiLowerCase(InString)[i]) + 3000)
                             + ''';' + #13#10;
                 	EscapeArray := EscapeArray + 'EscapeList['
                             + IntToStr(Counter)
                             + ']=''' + LoadStr(Ord(AnsiLowerCase(InString)[i]))
                             + ''';' + #13#10;
                 	inc(Counter);
                 end;
           end;
     end;
  if Counter > 0 then
  	begin
     	CharArray := CharArray + #13#10 + 'for (var i=0; i<CharList.length; i++){CharList[i] = unescape(CharList[i])}';
     end;
  Result := CharArray + #13#10 + EscapeArray;
end;

function TrimString(InString: string): string;

var
Position: LongInt;

begin

  while Pos(#9, InString) > 0 do
  	begin
     	Position := Pos(#9, InString);
     	InString := Copy(InString, 0, Position - 1) + ' ' + Copy(InString, Position + 1, Length(InString) - Position);
     end;


  while Pos('  ', InString) > 0 do
  	begin
     	Position := Pos('  ', InString);
     	InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position);
     end;

  while (InString[1] = ' ') or (InString[1] = #13) or (InString[1] = #10) do
  	InString := Copy(InString, 2, Length(InString) - 1);

  while (InString[Length(InString)] = ' ') or (InString[Length(InString)] = #13) or (InString[Length(InString)] = #10) do
  	InString := Copy(InString, 1, Length(InString) - 1);

  Result := InString;
end;

function RemoveReturns(InString: string): string;
var
Position: LongInt;

begin
	while Pos(#13, InString) > 0 do
	  	begin
     	Position := Pos(#13, InString);
     	InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position);
     end;
  while Pos(#10, InString) > 0 do
	  	begin
     	Position := Pos(#10, InString);
     	InString := Copy(InString, 0, Position - 1) + Copy(InString, Position + 1, Length(InString) - Position);
     end;

  Result := InString;
end;

//Adds HTML <br> tags for each return and 5 X &nbsp; for tabs
function HTMLParas(InString: string): string;

var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 to Length(InString) do
  	begin
     	if InString[i] = #13 then
        	OutString := OutString + '<br />' + InString[i]
        else
        	begin
           	if InString[i] = #9 then
              	OutString := OutString + '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'
              else
        			OutString := OutString + InString[i];
           end;
     end;

     Result := OutString;
end;

//Replaces all returns with <BR> tags, and removes unwanted
//BRs in table code
function ReturnsToBR(InString: string): string;
begin
//Line added for 5.5.0.12 -- indentation in table code produced by other authoring
//tools was causing problems
  Result := RemoveDoubleSpaces(InString);
	Result := ReplaceStuff(#13#10, '<br />', Result);
//New section -- doesn't screw up table code
	Result := ReplaceStuff('<br /><tr', '<tr', Result);
  Result := ReplaceStuff('<br /><td', '<td', Result);
  Result := ReplaceStuff('<br /></td>', '</td>', Result);
  Result := ReplaceStuff('<br /></tr>', '</tr>', Result);
  Result := ReplaceStuff('<tr><br />', '<tr>', Result);
  Result := ReplaceStuff('<td><br />', '<td>', Result);
  Result := ReplaceStuff('</td><br />', '</td>', Result);
  Result := ReplaceStuff('</tr><br />', '</tr>', Result);
  Result := ReplaceStuff('<br /></table>', '</table>', Result);
	Result := ReplaceStuff('<br /> <tr', '<tr', Result);
  Result := ReplaceStuff('<br /> <td', '<td', Result);
  Result := ReplaceStuff('<br /> </td>', '</td>', Result);
  Result := ReplaceStuff('<br /> </tr>', '</tr>', Result);
  Result := ReplaceStuff('<tr> <br />', '<tr>', Result);
  Result := ReplaceStuff('<td> <br />', '<td>', Result);
  Result := ReplaceStuff('</td> <br />', '</td>', Result);
  Result := ReplaceStuff('</tr> <br />', '</tr>', Result);
//New lines for table head for HotPot 6 and Quandary 2.1
  Result := ReplaceStuff('<br /><thead', '<thead', Result);
  Result := ReplaceStuff('<br /></thead>', '</thead>', Result);
  Result := ReplaceStuff('<br /> </table>', '</table>', Result);
//Another new section -- don't screw up lists!
	Result := ReplaceStuff('<br /><li', '<li', Result);
  Result := ReplaceStuff('<br /></li', '</li', Result);
  Result := ReplaceStuff('<br /><ul', '<ul', Result);
  Result := ReplaceStuff('<br /></ul', '</ul', Result);
  Result := ReplaceStuff('</li><br />', '</li>', Result);
  Result := ReplaceStuff('</ul><br />', '</ul>', Result);
end;

//Removes all punctuation except single quotes
function StripPunctuation(InString: string): string;

var
i: LongInt;
OutString: string;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if not (InString[i] in PuncMarks) then
        	OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

//changes returns to Char #26s in a string (for saving string lists)
function HideReturns(InString: string): string;

var
i: LongInt;

begin
  if InString <> '' then
  	while Pos(#13#10, InString) > 0 do
     	begin
        	i := Pos(#13#10, InString);
           InString := Copy(InString, 0, i-1) + #26 + Copy(InString, i+2, (Length(InString) - (i+1)));
        end;

     Result := InString;
end;

//changes Char #26s to returns in a string (for loading string lists)
function RestoreReturns(InString: string): string;

var
i: LongInt;

begin
	if InString <> '' then
		while Pos(#26, InString) > 0 do
  		begin
     		i := Pos(#26, InString);
     		InString := Copy(InString, 0, i-1) + #13#10 + Copy(InString, i+1, (Length(InString) - (i)));
     	end;

     Result := InString;
end;

//replaces a substring in a string with another string
function ReplaceStuff(Token, Replacement, InString: string): string;

var
i: LongInt;
SoFar: LongInt;

begin
	if InString <> '' then
  	begin
			SoFar := 0;
			while Pos(Token, InString) > SoFar do
  			begin
     			i := Pos(Token, InString);
        		SoFar := (i-1) + Length(Replacement);
     			InString := Copy(InString, 0, i-1) + Replacement + Copy(InString, i+Length(Token), (Length(InString) - (i+Length(Token)-1)));
     		end;
     end;

     Result := InString;
end;

function WReplaceStuff(Token, Replacement, InString: WideString): WideString;
var
i: integer;
SoFar: integer;

begin
  if InString <> '' then
     begin
        SoFar := 0;
        while Pos(Token, InString) > SoFar do
           begin
              i := Pos(Token, InString);
              SoFar := (i-1) + Length(Replacement);
              InString := Copy(InString, 0, i-1) + Replacement + Copy(InString, i+Length(Token), (Length(InString) - (i+Length(Token)-1)));
           end;
     end;
  Result := InString;
end;

//Places a backslash before each single quote
function EscapeSingleQuotes(InString: string): string;

var
i: LongInt;
OutString: string;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if (InString[i] = '''') or (InString[i] = '\') then
        	OutString := OutString + '\';
        OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

//Places a backslash before each double quote
function EscapeDoubleQuotes(InString: string): string;

var
i: LongInt;
OutString: string;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if InString[i] = '"' then
        	OutString := OutString + '\';
        OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

//Places a backslash before each single or double quote
function EscapeAllQuotes(InString: string): string;

var
i: LongInt;
OutString: string;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if (InString[i] = '"') or (InString[i] = '''') then
        	OutString := OutString + '\';
        OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

function QuotesToPercentEscapes(InString: string): string;
var
i: LongInt;
OutString: string;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if (InString[i] = '"') then
        	OutString := OutString + '%22'
        else
           if (InString[i] = '''') then
              OutString := OutString + '%27'
           else
              OutString := OutString + InString[i];
     end;
  Result := OutString;
end;

//Replaces all double quotes with &quot; -- used for button captions
function DQuotesToEntity(InString: string): string;
var
i: LongInt;
OutString: string;
begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if (InString[i] = '"') then
        	OutString := OutString + '&quot;'
        else
        	OutString := OutString + InString[i];
     end;
  Result := OutString;

end;

//Changes angle brackets to HTML escapes
function EscapeAngleBrackets(InString: string): string;
begin
	InString := ReplaceStuff('<', '&lt;', InString);
  InString := ReplaceStuff('>', '&gt;', InString);
  Result := InString;
end;

function MakeNonBreaking(InString: string): string;
begin
	InString := ReplaceStuff(' ', '&nbsp;', InString);
  InString := ReplaceStuff(#9, '&nbsp;', InString);
  InString := ReplaceStuff(#13#10, '&nbsp;', InString);
  InString := ReplaceStuff(#13, '&nbsp;', InString);
  InString := ReplaceStuff(#10, '&nbsp;', InString);
  InString := Trim(InString);
  Result := InString;
end;

//finds the position of the last instance of an open body tag, if there is one,
//in a Web page; returns the position of the first char after the end of the tag.
function GetLastHeadCloseTagLocation(InString: string): integer;
var
i, j: integer;

begin
//Initialize default
	Result := -1;
  InString := ANSIUpperCase(InString);
  i := 0;
  j := Pos('</HEAD>', InString);
  while j > 0 do
  	begin
     	i := j;
        j := PosEx('</HEAD>', InString, j+1);
     end;
  Result := i;
end;

function GetLastBodyOpenTagLocation(InString: string): integer;
var
i, j: integer;
Index1: integer;
Index2: integer;

begin
//Initialize default
	Result := -1;
  InString := ANSIUpperCase(InString);
  i := 0;
  j := Pos('<BODY ', InString);
  while j > 0 do
  	begin
     	i := j;
        j := PosEx('<BODY ', InString, j+1);
     end;
  Index1 := i;

  i := 0;
  j := Pos('<BODY>', InString);
  while j > 0 do
  	begin
     	i := j;
        j := PosEx('<BODY>', InString, j+1);
     end;
  Index2 := i;
  if Index1 > Index2 then
  	i := Index1
  else
  	i := Index2;

  if i > 0 then
  	begin
  		while (InString[i] <> '>') and (i < Length(InString)) do
     		inc(i);
        inc(i);
     end;

  Result := i;
end;

//finds the position of the last instance of a close body tag, if there is one,
//in a Web page; returns the position of the first char in the tag.
function GetLastBodyCloseTagLocation(InString: string): integer;
var
i, j: integer;

begin
//Initialize default
	Result := -1;
  InString := ANSIUpperCase(InString);
  i := 0;
  j := Pos('</BODY>', InString);
  while j > 0 do
  	begin
     	i := j;
        j := PosEx('</BODY>', InString, j+1);
     end;

  if i > 0 then
  	Result := i;
end;

function IsGoodEMailAddress(InString: string): Boolean;
var
i: integer;
GoodChars: set of Char;

begin
  Result := True;
  if Length(InString) < 5 then
     begin
        Result := False;
        Exit;
     end;
  GoodChars := ['A'..'Z', 'a'..'z', '0'..'9','_','-','.','@'];
  for i := 1 to Length(InString) do
     if not (InString[i] in GoodChars) then
        Result := False;
end;

//Replaces double spaces with single spaces until there are no more doubles
function RemoveDoubleSpaces(InString: string): string;
var
Found: LongInt;

begin
	while Pos('  ', InString) > 0 do
  	begin
        Found := Pos('  ', InString);
        InString := Copy(InString, 1, Found) + Copy(InString, Found + 2, Length(InString) - (Found + 1));
     end;
  Result := InString;
end;

function WRemoveDoubleSpaces(InString: WideString): WideString;
var
Found: integer;

begin
  while Pos('  ', InString) > 0 do
     begin
        Found := Pos('  ', InString);
        InString := Copy(InString, 1, Found) + Copy(InString, Found + 2, Length(InString) - (Found + 1));
     end;
  Result := InString;
end;

function WRemoveReturns(InString: WideString): WideString;
begin
  InString := WideStringReplace(InString, #13#10, '', [rfReplaceAll]);
  InString := WideStringReplace(InString, #13, '', [rfReplaceAll]);
  InString := WideStringReplace(InString, #10, '', [rfReplaceAll]);
  Result := InString;
end;

procedure SplitWords(InString: string; var OutList: TStringList);
var
BeginWord: LongInt;
i: LongInt;
InWord: Boolean;

begin
//Clear the list
	OutList.Clear;

//iterate through the string
  BeginWord := 1;
  InWord := False;

	for i := 1 to Length(InString) do
  	begin
     	if (InString[i] in PuncMarks) or (InString[i] = ' ') then
        	begin
//Ignore apostrophes inside words
					if not ((InString[i] = '''') and not (InString[i-1] in PuncMarks) and not (InString[i+1] in PuncMarks)) then
              	begin
           			if InWord then
              			OutList.Append(Copy(InString, BeginWord, i-BeginWord));
           			if (InString[i] <> ' ') then
                    	begin
                       	if InString[i+1] = ' ' then
              					OutList.Append(InString[i] + ' ')
                          else
                          	if InString[i-1] = ' ' then
                             	OutList.Append(' ' + InString[i])
                                	else
                                   	OutList.Append(InString[i]);
                       end;
              		BeginWord := i + 1;
              		InWord := False;
                 end;
           end
        else
        	begin
        		InWord := True;
              if i = Length(InString) then
              	OutList.Append(Copy(InString, BeginWord, (i-BeginWord) + 1));
           end;
     end;
end;

procedure WideSplitWords(InString: WideString; var OutList: TWideStringList);
var
BeginWord: integer;
i: integer;
InWord: Boolean;

begin
//Clear the list
	OutList.Clear;

//Bail if nothing useful coming in
  if Length(InString) < 1 then
     Exit;

//Add initial and final space to avoid range check error!
  if InString[Length(InString)] <> ' ' then
     InString := InString + WideString(' ');
  if InString[1] <> ' ' then
     InString := WideString(' ') + InString;

//iterate through the string
  BeginWord := 1;
  InWord := False;

	for i := 1 to Length(InString) do
  	begin
     	if (UnicodeIsWhitespace(Cardinal(InString[i]))) or (UnicodeIsPunctuation(Cardinal(InString[i]))) then
        	begin
//Ignore apostrophes inside words
					if not ((InString[i] = '''') and not (UnicodeIsPunctuation(Cardinal(InString[i-1]))) and not (UnicodeIsPunctuation(Cardinal(InString[i+1])))) then
              	begin
           			if InWord then
              			OutList.Append(Copy(InString, BeginWord, i-BeginWord));
           			if (InString[i] <> ' ') then
                    	begin
                       	if InString[i+1] = ' ' then
              					OutList.Append(InString[i] + WideString(' '))
                          else
                             if InString[i-1] = ' ' then
                                OutList.Append(WideString(' ') + InString[i])
                             else
                                OutList.Append(InString[i]);
                       end;
              		BeginWord := i + 1;
              		InWord := False;
                 end;
           end
        else
        	begin
        		InWord := True;
              if i = Length(InString) then
              	OutList.Append(Copy(InString, BeginWord, (i-BeginWord) + 1));
           end;
     end;
end;


//Gets a piece of text between two markers and returns it
function GetTextBetween(InString, StartMarker, EndMarker: string): string;
var
i: LongInt;
j: LongInt;
OutString: string;

begin
  OutString := '';

//Make sure both markers exist
	i := Pos(StartMarker, InString);
  j := PosEx(EndMarker, InString, i+Length(StartMarker));

  if (i < 1) or (j < 1) then
  	begin
     	Result := OutString;
        Exit;
     end;

 	i := i + Length(StartMarker);
  OutString := Copy(InString, i, j - i);

	Result := OutString;
end;

function WGetTextBetween(InString, StartMarker, EndMarker: WideString): WideString;
var
StartPos, EndPos: integer;
wsTemp: WideString;

begin
  Result := ''; //default;
  StartPos := Pos(StartMarker, InString);
  if StartPos > 0 then
    begin
      wsTemp := Copy(InString, StartPos + Length(StartMarker), Length(InString) - (StartPos + Length(StartMarker) - 1));
      EndPos := Pos(EndMarker, wsTemp);
      if EndPos > 0 then
        Result := Copy(wsTemp, 1, EndPos-1);
    end;
end;

//Gets a piece of text between two markers and returns it with or without markers;
//can also replace the string in the original text
function GetTextBetweenExtra(var OriginalString: string; StartMarker, EndMarker, Replacement: string; IncludeMarkers, Replace: Boolean): string;
var
i: LongInt;
j: LongInt;
OutString: string;

begin
  OutString := '';

//Make sure both markers exist
	i := Pos(StartMarker, OriginalString);
  j := PosEx(EndMarker, OriginalString, i);

  if (i < 1) or (j < 1) then
  	begin
     	Result := OutString;
        Exit;
     end;

  J := j + Length(EndMarker);
  OutString := Copy(OriginalString, i, j - i);

  if Replace then
  	OriginalString := ReplaceStuff(OutString, Replacement, OriginalString);
     
  if not IncludeMarkers then
		begin
     	OutString := ReplaceStuff(StartMarker, '', OutString);
        OutString := ReplaceStuff(EndMarker, '', OutString);
     end;

	Result := OutString;
end;

//Replaces text between two markers, including the markers;
//does only the first instance
function ReplaceTextFromTo(InString: string; StartMarker, EndMarker, Replacement: string): string;
var
i: LongInt;
j: LongInt;
OutString: string;

begin
  OutString := '';

//Make sure both markers exist
	i := Pos(StartMarker, InString);
  j := PosEx(EndMarker, InString, i);

  if (i < 1) or (j < 1) then
  	begin
     	Result := OutString;
        Exit;
     end;

  j := j + Length(EndMarker);
  OutString := Copy(InString, 1, i-1) +
  				 Replacement +
               Copy(InString, j, Length(InString) - (j-1));
	Result := OutString;
end;

function RemoveFileExtension(InString: string): string;
//Takes the dot and extension off the file path/name and returns it.
//Returns the whole thing if there's no dot found.
var
i: integer;

begin
	i := Length(InString);
  while (InString[i] <> '.') and (i > 0) do
  	Dec(i);

  if i > 0 then
  	Result := Copy(InString, 1, i-1)
  else
  	Result := InString;
end;


//Splits a string into sections, and fills the
//string list with those sections
function SplitString(CharsPerString: integer; InString: string; var OutList: TStringList): Boolean;
var
BreakPoint: integer;

begin
//Set the result
	Result := False;

//clear the string list
  OutList.Clear;

//Add all but the final bit
  while Length(InString) > CharsPerString do
  	begin
     	BreakPoint := CharsPerString;
        while (InString[BreakPoint] = '\') and (BreakPoint < Length(InString)) do
        	inc(BreakPoint);
     	OutList.Add(Copy(InString, 1, BreakPoint));
        InString := Copy(InString, BreakPoint + 1, Length(InString) - BreakPoint);
     end;

//Add the final bit
	OutList.Add(InString);
  Result := True;
end;

//Splits a string into items in a string list based on the line end token
function SplitStringToLines(LineEndToken: string; InString: string; var OutList: TStringList): Boolean;
var
TokenPosition: integer;
StartPoint: integer;

begin
//Clear the string list
	OutList.Clear;

//Check you have a valid token
	if Length(LineEndToken) < 1 then
  	begin
     	OutList.Add(InString);
        Result := False;
        Exit;
     end;

//Work through the string
	TokenPosition := Pos(LineEndToken, InString);
	while TokenPosition > 0 do
  	begin

//Get the string
			OutList.Add(Copy(InString, 1, TokenPosition - 1));

//Remove it from InString
        StartPoint := TokenPosition + Length(LineEndToken);
        InString := Copy(InString, StartPoint, Length(InString) - (StartPoint - 1));

     	TokenPosition := Pos(LineEndToken, InString);
     end;

//Add the last bit
	OutList.Add(InString);
  Result := True;

end;

//Splits a string into items in a string list based on the line end token;
//Includes the token
function SplitStringToLinesEx(LineEndToken: string; InString: string; var OutList: TStringList): Boolean;
var
TokenPosition: integer;
StartPoint: integer;

begin
//Clear the string list
	OutList.Clear;

//Check you have a valid token
	if Length(LineEndToken) < 1 then
  	begin
     	OutList.Add(InString);
        Result := False;
        Exit;
     end;

//Work through the string
	TokenPosition := Pos(LineEndToken, InString);
	while TokenPosition > 0 do
  	begin

//Get the string
			OutList.Add(Copy(InString, 1, TokenPosition - 1) + LineEndToken);

//Remove it from InString
        StartPoint := TokenPosition + Length(LineEndToken);
        InString := Copy(InString, StartPoint, Length(InString) - (StartPoint - 1));

     	TokenPosition := Pos(LineEndToken, InString);
     end;

//Add the last bit if there's anything left
	if Length(InString) > 0 then
		OutList.Add(InString);
  Result := True;

end;

//Assigns a string to a JavaScript identifier.
function MakeJavaScriptString(StringName, InString: string): string;
begin
	Result := StringName + '=''' + HDecimalUCodeToJHexUCode(EscapeSingleQuotes(InString)) + ''';';
end;

function MakeHexJavaScriptString(StringName, InString: string): string;
begin
	Result := MakeJavaScriptString(StringName, PCCharToHex(InString));
end;

function MakeJavaScriptArray(ArrayName: string; ArrayItems: TStringList; StartFrom: integer): string;
var
i: integer;
OutString: string;
ItemName: string;

begin
	Outstring := '';
  if ArrayItems.Count > 0 then
  	begin
     	for i := 0 to ArrayItems.Count - 1 do
        	begin
           	ItemName := ArrayName + '[' + IntToStr(StartFrom + i) + ']';
              OutString := OutString + MakeJavaScriptString(ItemName, ArrayItems[i]) + #13#10;
           end;
     end;
  Result := OutString;

end;

function StripChars(CharsToStrip: string; InString: string): string;
//this removes each of the chars in CharsToStrip from the string
var
FoundPos: integer;
i: integer;
CharString: string;

begin
	Result := InString;
	for i := 1 to Length(CharsToStrip) do
  	begin
     	CharString := Copy(CharsToStrip, i, 1);
     	FoundPos := Pos(CharString, InString);
        while FoundPos > 0 do
        	begin
           	Delete(InString, FoundPos, 1);
              FoundPos := Pos(CharsToStrip[i], InString);
           end;
     end;
	Result := InString;
end;

function IncludeStuff(InString, Tag: string; IncludeIt: Boolean): string;
//This deletes an element bracketed by tags, or if IncludeIt is True,
//it deletes the tags only. It operates on all instances of the tag in
//InString.
var
OutString: string;
OpenTag: string;
CloseTag: string;
StartPos: LongInt;
EndPos: LongInt;

begin
	OpenTag := '[' + Tag + ']';
  CloseTag := '[/' + Tag + ']';
  OutString := InString;
	if IncludeIt = True then
  	begin
     	OutString := ReplaceStuff(OpenTag, '', OutString);
        Result := ReplaceStuff(CloseTag, '', OutString);
        Exit;
     end
  else
  	begin
     	while Pos(OpenTag, OutString) > 0 do
        	begin
           	StartPos := Pos(OpenTag, OutString);
              EndPos := PosEx(CloseTag, OutString, StartPos);
              if EndPos > 0 then
              	begin
                 	EndPos := EndPos + Length(CloseTag);
                 	OutString := Copy(OutString, 1, StartPos-1) + Copy(OutString, EndPos, Length(OutString) - (EndPos-1));
                 end
              else
              	begin
 //No end tag -- delete the open tag only
                 	OutString := Copy(OutString, 1, StartPos-1) + Copy(OutString, StartPos + Length(OpenTag), Length(OutString) - ((StartPos + Length(OpenTag))-1));
                 end;
				end;
        Result := OutString;
     end;

end;

function InsertMetaTag(WebPage, ProgName, UserName: string): string;
//This inserts a meta tag into the page identifying the app
//and user who created it.
var
MetaTag: string;
HeadPos: LongInt;
begin

//Find the position to insert at
	HeadPos := Pos('<HEAD>', UpperCase(WebPage));

//Insert the string
  if HeadPos > 0 then
  	begin
     	MetaTag := '<meta name="author" content="Created with ' + ProgName + ' by Half-Baked Software, ';
        if UserName <> '' then
        	MetaTag := MetaTag + 'registered to ' + UserName + '."></meta>'
        else
        	MetaTag := MetaTag + 'UNREGISTERED.">';
        MetaTag := MetaTag + '<meta name="keywords" content="' + ProgName + ', Hot Potatoes, Half-Baked Software, Windows, University of Victoria"></meta>';
			Insert(MetaTag, WebPage, HeadPos + 6);
     end;
     
  Result := WebPage;
end;

function RemoveJSComments(InString: string): string;
var
StringList: TStringList;
i: integer;
InJavaScript: Boolean;

begin
	StringList := TStringList.Create;

//Set the result to the input in case of cockups
  Result := InString;
  try
     StringList.Text := InString;
     if StringList.Count < 1 then
     	Exit;

     InJavaScript := False;
     i := StringList.Count - 1;
     while (i > 0) do
     	begin
        	if InJavaScript then
           	begin
        			if Copy(StringList[i], 1, 2) = '//' then
                    if Pos(Copy(StringList[i], 1, 4), '//<!<!--//--//]]') < 1 then
                 	   StringList.Delete(i);
              end;
           if ANSIUpperCase(Copy(StringList[i], 1, 7)) = '<SCRIPT' then
           	InJavaScript := False;
           if ANSIUpperCase(Copy(StringList[i], 1, 9)) = '</SCRIPT>' then
           	InJavaScript := True;

           dec(i);
        end;
  	Result := StringList.Text;
  finally
  	StringList.Free;
  end;
end;

function RemoveJSComments2(InString: string): string;
//This function assumes that we're only processing a block of pure JavaScript, and
//doesn't allow for any surrounding HTML tags
var
SL: TStringList;
i: integer;
j: integer;
CommentPos: integer;
QuoteCount: integer;

begin
//Set the result to the input in case of cockups
  Result := InString;

//Remove block-level comments in one go
  GetTextBetweenExtra(InString, '/*', '*/', '', True, True);

	SL := TStringList.Create;

  try
     SL.Text := InString;
     if SL.Count < 1 then
     	Exit;

     i := SL.Count - 1;
     while (i > 0) do
     	begin
           CommentPos := Pos('//', SL[i]);
           if (CommentPos > 0) then
              begin
                 if (CommentPos = 1) then
                    SL[i] := Copy(SL[i], 1, CommentPos - 1)
                 else
//Check it's not part of a string literal -- this only takes account of
//strings with single quotes because we're working on our own files
                    begin
                       QuoteCount := 0;
                       for j := Length(SL[i]) downto CommentPos do
                          if (SL[i][j]  = '''') then
                             inc(QuoteCount);
                       if QuoteCount mod 2 = 0 then
//it shouldn't be inside a string literal
                          SL[i] := Copy(SL[i], 1, CommentPos - 1);
                    end;
              end;
           if Length(SL[i]) < 1 then
              SL.Delete(i);
           dec(i);
        end;
  	Result := SL.Text;
  finally
  	SL.Free;
  end;
end;

function SeparateJavaScript(var InString: string; HTMLFileName: string): Boolean;
var
JSCode: TStringList;
JSFileName: string;
JSLinkTag: string;
StartOpenTag, EndOpenTag, StartCloseTag, EndCloseTag: integer;

begin
//Default return
	Result := False;

//Is there any JavaScript?
  StartOpenTag := Pos('<script', InString);
	if StartOpenTag < 1 then
  	Exit;

	JSCode := TStringList.Create;
  try

//Make the javascript file name
     JSFileName := ChangeFileExt(HTMLFileName, '.js');

//Make the link tag to go in the HTML file
     JSLinkTag := '<script src="' + ExtractFileName(JSFileName) + '" type="text/javascript" language="javascript"></script>';

//Find each piece of script and extract it
     while StartOpenTag > 0 do
        begin
//Find the relevant starts and ends for the script and its tags
           EndOpenTag := StartOpenTag;
           while (InString[EndOpenTag] <> '>') and (EndOpenTag < Length(InString)) do
              inc(EndOpenTag);

           StartCloseTag := PosEx('</script>', InString, EndOpenTag);
           if StartCloseTag < 1 then
              break;

           EndCloseTag := StartCloseTag + 9;

//Grab the script
           JSCode.Add(Copy(InString, EndOpenTag+1, (StartCloseTag-(EndOpenTag+1))));

//Take the whole script tag out of the page
           InString := Copy(InString, 1, StartOpenTag-1) +
                       Copy(InString, EndCloseTag+1, Length(InString)-EndCloseTag);

//Get the next bit of script
           StartOpenTag := Pos('<script', InString);
        end;

//If anything was found, save it
		if JSCode.Count > 0 then
     	begin
     		JSCode.SaveToFile(JSFileName);
//Insert the link in the file
           Insert(JSLinkTag, InString, Pos('</head>',InString));
           Result := True;
        end;

   finally
   	JSCode.Free;
   end;
end;

function SeparateFirstJavaScript(var InString: string; HTMLFileName: string): Boolean;
var
JSCode: TStringList;
JSFileName: string;
JSLinkTag: string;
StartOpenTag, EndOpenTag, StartCloseTag, EndCloseTag: integer;

begin
//Default return
	Result := False;

//Is there any JavaScript?
  StartOpenTag := Pos('<script', InString);
	if StartOpenTag < 1 then
  	Exit;

	JSCode := TStringList.Create;
  try

//Make the javascript file name
     JSFileName := ChangeFileExt(HTMLFileName, '.js');
//Anonymize the filename using this version:
//		JSFileName := FormatDateTime('yymmddhhnnsszzz', Now) + '.js';

//Make the link tag to go in the HTML file
     JSLinkTag := '<script src="' + ExtractFileName(JSFileName) + '" type="text/javascript"></script>';

//Find each piece of script and extract it
     if StartOpenTag > 0 then
        begin
//Find the relevant starts and ends for the script and its tags
           EndOpenTag := StartOpenTag;
           while (InString[EndOpenTag] <> '>') and (EndOpenTag < Length(InString)) do
              inc(EndOpenTag);

           StartCloseTag := PosEx('</script>', InString, EndOpenTag);

//If no close tag, then it's ill-formed and we should leave
           if StartCloseTag < 1 then
              begin
                 Exit;
              end;

           EndCloseTag := StartCloseTag + 9;

//Grab the script
           JSCode.Add(Copy(InString, EndOpenTag+1, (StartCloseTag-(EndOpenTag+1))));

//Take the whole script tag out of the page
           InString := Copy(InString, 1, StartOpenTag-1) +
                       Copy(InString, EndCloseTag+1, Length(InString)-EndCloseTag);
        end;

//If anything was found, save it
		if JSCode.Count > 0 then
     	begin
     		JSCode.SaveToFile(JSFileName);
//Insert the link in the file
           Insert(JSLinkTag, InString, Pos('</head>',InString));
           Result := True;
        end;

   finally
   	JSCode.Free;
   end;
end;

//Removes iffy chars and spaces from file names
function CleanupFileName(InName: string): string;
const
BadChars: set of Char = ['`','''','"',' ','(',')',':',';','{','}','[',']',',','+'];
var
i: integer;

begin
	Result := '';
  for i := 1 to Length(InName) do
  	if not (InName[i] in BadChars) then
     	Result := Result + InName[i];
end;

function TextToAudioFileName(Text: WideString; Extension: WideString): WideString;
var
i: integer;

begin
  Result := WideString('');
  if Length(Text) < 1 then
     Exit;
  for i := 1 to Length(Text) do
     begin
        if Ord(Text[i]) in [48..57,97..122] then
           Result := Result + Text[i]
        else
           if Ord(Text[i]) in [65..90] then
              Result := Result + WideLowerCase(Text[i])
           else
              if Text[i] = WideChar(' ') then
                 Result := Result + WideChar('_')
              else
                 if Text[i] = WideChar('''') then
                    Result := Result + WideChar('7')
                 else
                    Result := Result + WideChar('-');
     end;
  if Length(Extension) > 0 then
     begin
        if Extension[1] <> WideChar('.') then
           Extension := WideChar('.') + Extension;
        Result := Result + Extension;
  end;
end;

function AudioFileNameToText(AudioFileName: WideString; Extension: WideString): WideString;
var
i: integer;

begin
  Result := WideString('');
  if Length(AudioFileName) < 1 then
     Exit;
  if Length(Extension) > 0 then
     begin
        if Pos(Extension, AudioFileName) = (Length(AudioFileName) - Length(Extension))+1 then
           AudioFileName := Copy(AudioFileName, 1, Length(AudioFileName)-Length(Extension));
     end;
  for i := 1 to Length(AudioFileName) do
     begin
        if Ord(AudioFileName[i]) in [48..54,56..57,65..90,97..122] then
           Result := Result + AudioFileName[i]
        else
           if AudioFileName[i] = WideChar('_') then
              Result := Result + WideChar(' ')
           else
              if AudioFileName[i] = WideChar('7') then
                 Result := Result + WideChar('''')
              else
                 Result := Result + WideChar('?');
     end;
end;

function SLIndexOf(SList: TStringList; S: string; CaseSensitive: Boolean): integer;
var
i: integer;
begin
   if CaseSensitive = False then
      begin
         Result := SList.IndexOf(S);
         Exit;
      end;
   Result := -1; //default return
   if SList.Count < 1 then
      Exit;
   for i := 0 to SList.Count-1 do
      if SList[i] = S then
         Result := i;
end;

//WideString functions
function WPosEx(const SubStr: WideString; const S: WideString; StartIndex: integer): integer;
var
Temp: WideString;

begin
//Make a copy of the string starting from StartIndex
  Temp := Copy(S, StartIndex, Length(S) - (StartIndex-1));
  Result := Pos(SubStr, Temp);
  if Result > 0 then
     Result := Result + (StartIndex-1);
end;

function WidePosEx(const SubStr, S: WideString; Offset: Integer = 1): Integer; //Adapted Delphi code
var
  I,X: Integer;
  Len, LenSubStr: Integer;
begin
  if Offset = 1 then
    Result := Pos(SubStr, S)
  else
  begin
    if Offset < 0 then
    begin
      Result := 0;
      exit;
    end;
    I := Offset;
    LenSubStr := Length(SubStr);
    Len := Length(S) - LenSubStr + 1;
    while I <= Len do
    begin
      if S[I] = SubStr[1] then
      begin
        X := 1;
        while (X < LenSubStr) and (S[I + X] = SubStr[X + 1]) do
          Inc(X);
        if (X = LenSubStr) then
        begin
          Result := I;
          exit;
        end;
      end;
      Inc(I);
    end;
    Result := 0;
  end;
end;

function WideStringToANSI(WS: WideString): string;
var
  l: integer;
begin
  if WS = '' then
    Result := ''
  else
     begin
        l := WideCharToMultiByte(CP_ACP,
           0, @WS[1], -1, nil, 0, nil, nil);
        SetLength(Result, l-1);
        if l > 1 then
           WideCharToMultiByte(CP_ACP,
              0, @WS[1], -1, @Result[1], l-1, nil, nil);
     end;
end;

//Places a backslash before each single quote
function WEscapeSingleQuotes(InString: WideString): WideString;

var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
     	if (InString[i] = '''') or (InString[i] = '\') then
        	OutString := OutString + '\';
        OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

//Places a backslash before each double quote
function WEscapeDoubleQuotes(InString: WideString): WideString;
var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
        Application.ProcessMessages;
     	if InString[i] = '"' then
        	OutString := OutString + '\';
        OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

function WEscapeAllQuotes(InString: WideString): WideString;
var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
      Application.ProcessMessages;
     	if InString[i] = WideChar('"') then
        	OutString := OutString + '\u0022'
      else
        if InString[i] = WideChar('''') then
          OutString := OutString + '\u0027'
        else
          OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

function WMakeCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString;
begin
  Result := WideTrim(InString);
//Do ampersands first, so other entities aren't double-escaped
  Result := StringReplace(Result, '&', '&#38;', [rfReplaceAll]);
  if QuoteChar = '"' then
    Result := StringReplace(Result, '"', '&#34;', [rfReplaceAll]);
  if QuoteChar = '''' then
    Result := StringReplace(Result, '''', '&#39;', [rfReplaceAll]);
  Result := StringReplace(Result, #13, ' ', [rfReplaceAll]);
  Result := StringReplace(Result, #10, '', [rfReplaceAll]);
  Result := StringReplace(Result, '<', '&#60;', [rfReplaceAll]);
  Result := StringReplace(Result, '>', '&#62;', [rfReplaceAll]);
end;

function WUriAsCompliantXMLAttribute(InString: WideString; QuoteChar: WideChar): WideString;
begin
  Result := WideTrim(InString);
//Do ampersands first, so other entities aren't double-escaped.
//First unescape any, so that we know where we stand.
  Result := StringReplace(Result, '&amp;', '&', [rfReplaceAll]);
//Now escape them all.
  Result := StringReplace(Result, '&', '&amp;', [rfReplaceAll]);
  if QuoteChar = '"' then
    Result := StringReplace(Result, '"', '&#34;', [rfReplaceAll]);
  if QuoteChar = '''' then
    Result := StringReplace(Result, '''', '&#39;', [rfReplaceAll]);
  Result := StringReplace(Result, #13, ' ', [rfReplaceAll]);
  Result := StringReplace(Result, #10, '', [rfReplaceAll]);
  Result := StringReplace(Result, '<', '&#60;', [rfReplaceAll]);
  Result := StringReplace(Result, '>', '&#62;', [rfReplaceAll]);
end;

function WUnescapeXMLAttribute(InString: WideString): WideString;
begin
  Result := WideTrim(InString);
//Do ampersands first, so other entities aren't double-escaped.
  Result := StringReplace(Result, '&amp;', '&', [rfReplaceAll]);
  Result := StringReplace(Result, '&#38;', '&', [rfReplaceAll]);
  Result := StringReplace(Result, '&#34;', '"', [rfReplaceAll]);
  Result := StringReplace(Result, '&#39;', '''', [rfReplaceAll]);
  Result := StringReplace(Result, '&#60;', '<', [rfReplaceAll]);
  Result := StringReplace(Result, '&#62;', '>', [rfReplaceAll]);
  Result := StringReplace(Result, '&lt;', '<', [rfReplaceAll]);
  Result := StringReplace(Result, '&gt;', '>', [rfReplaceAll]);
end;

function WEscapeAngleBrackets(InString: Widestring): Widestring;
var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
	for i := 1 To Length(InString) do
  	begin
        Application.ProcessMessages;
     	if InString[i] = '<' then
        	OutString := OutString + WideString('&lt;')
        else
           if InString[i] = '>' then
        	   OutString := OutString + WideString('&gt;')
           else
              OutString := OutString + InString[i];
     end;

  Result := OutString;
end;

function WNormalizeReturns(InString: WideString): WideString;
var
i: integer;
wsTemp: WideString;

begin
  Result := InString;
  if Length(InString) < 1 then
    Exit;
  wsTemp := WideString('*&^%$#');
  Result := StringReplace(Result, WideString(#10#10), wsTemp, [rfReplaceAll]);
  Result := StringReplace(Result, WideString(#13#10), wsTemp, [rfReplaceAll]);
  Result := StringReplace(Result, WideString(#10), wsTemp, [rfReplaceAll]);
  Result := StringReplace(Result, WideString(#13), wsTemp, [rfReplaceAll]);
  Result := StringReplace(Result, wsTemp, WideString(#13#10), [rfReplaceAll]);
end;

function WideStringToHTMLNumeric(InString: WideString): string;
var
i: integer;
CharNum: integer;

begin
//initialize, otherwise repeated calls to the function will simply
//append to the Result variable
	Result := '';
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum = 13) then
        	Result := Result + #13#10
        	else
        		if (CharNum < 128) and (CharNum <> 10) then
           		Result := Result + Chr(CharNum)
              else
        			if (Ord(InString[i])<>10) then
							Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';';
     end;
end;

function WideStringToHTMLNumericAbove255(InString: WideString): string;
var
i: integer;
CharNum: integer;

begin
//initialize, otherwise repeated calls to the function will simply
//append to the Result variable
	Result := '';
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum = 13) then
        	Result := Result + #13#10
        	else
        		if (CharNum < 256) and (CharNum <> 10) then
           		Result := Result + Chr(CharNum)
              else
        			if (Ord(InString[i])<>10) then
							Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';';
     end;
end;

function WideStringToHTMLHex(InString: WideString): string;
var
i: integer;
CharNum: integer;

begin
//initialize, otherwise repeated calls to the function will simply
//append to the Result variable
	Result := '';
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum = 13) then
        	Result := Result + #13#10
        	else
        		if (CharNum < 128) and (not (CharNum in [10,38,60,62])) then //Need to escape angle brackets and ampersands
           		Result := Result + Chr(CharNum)
              else
        			if (Ord(InString[i])<>10) then
							Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';';
     end;
end;

function WideStringToHTMLOutput(InString: WideString; ProcessRTL: Boolean): string;
//This escapes angle brackets that aren't part of tags before conversion
var
i: integer;
CharNum: integer;

begin
//initialize, otherwise repeated calls to the function will simply
//append to the Result variable
	Result := '';
  InString := WConvertUnmatchedAngleBrackets(InString);
  InString := WReturnsToBR(InString);
  if ProcessRTL then
     InString := ProcessForRTLBoundaries(InString, False);
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum < 128) then
           Result := Result + Chr(CharNum)
        else
           if (Ord(InString[i])<>10) then
              Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';';
     end;
end;

function WideStringToHTMLOutputCentredRTL(InString: WideString; ProcessRTL: Boolean): string; //This escapes angle brackets that aren't part of tags before conversion
var
i: integer;
CharNum: integer;

begin
//initialize, otherwise repeated calls to the function will simply
//append to the Result variable
	Result := '';
  InString := WConvertUnmatchedAngleBrackets(InString);
  InString := WReturnsToBR(InString);
  if ProcessRTL then
     InString := ProcessForRTLBoundaries(InString, True);
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum < 128) then
           Result := Result + Chr(CharNum)
        else
           if (Ord(InString[i])<>10) then
              Result := Result + '&#x' + IntToHex(Ord(InString[i]), 4) + ';';
     end;
end;

function WideStringToJSUnicode(InString: WideString): string;
var
i: integer;
CharNum: integer;

begin
  Result := '';
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(InString[i]);
     	if (CharNum = 13) then
        	Result := Result + ' '
        	else
              if ((CharNum = 92) or (CharNum = 39)) then //backslash or apostrophe need to be escaped
                 Result := Result + '\' + Chr(CharNum)
              else
                 if (CharNum < 128) and (CharNum <> 10) then
                    Result := Result + Chr(CharNum)
                 else
                    if (Ord(InString[i])<>10) then
                       Result := Result + '\u' + IntToHex(Ord(InString[i]), 4);
     end;
end;

function WideStringToWideJSUnicode(InString: WideString): WideString;
var
i: integer;
CharNum: integer;

begin
  Result := WideString('');
  for i := 1 to Length(InString) do
  	begin
        Application.ProcessMessages;
     	CharNum := Ord(WideChar(InString[i]));
     	if (CharNum = 13) then
        	Result := Result + WideString(' ')
        	else
              if ((CharNum = 92) or (CharNum = 39)) then //backslash or apostrophe need to be escaped
                 Result := Result + WideString('\') + WideChar(CharNum)
              else
                 if (CharNum <> 10) then
                    Result := Result + WideChar(CharNum);
     end;
end;

function WideStringToExplanation(InString: WideString): string;
var
i: integer;
CharNum: integer;
DecNums, HexNums: string;

begin
  DecNums  := 'Dec: ';
  HexNums := 'Hex: ';
  if Length(InString) > 0 then
     begin
        for i := 1 to Length(InString) do
           begin
              Application.ProcessMessages;
              CharNum := Ord(WideChar(InString[i]));
              DecNums := DecNums  + ' ' + IntToStr(CharNum) + ' ';
              HexNums := HexNums  + ' ' + IntToHex(CharNum, 4) + ' ';
           end;
        Result := DecNums + ' ::  ' + HexNums;
     end;
end;

function WReturnsToBR(InString: WideString): WideString;
//var
//i: integer;
//CharNum: integer;

begin
  Result := WRemoveDoubleSpaces(InString);
	Result := WideStringReplace(Result,#13#10, '<br />', [rfReplaceAll]);
//Replace remaining #13s in case #10s are missing
  Result := WideStringReplace(Result,#13, '<br />', [rfReplaceAll]);
  
//New section -- doesn't screw up table code

	Result := WideStringReplace(Result,'<br /><tr', '<tr', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /><td', '<td', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></td>', '</td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></tr>', '</tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<tr><br />', '<tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<td><br />', '<td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</td><br />', '</td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</tr><br />', '</tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></table>', '</table>', [rfReplaceAll]);
	Result := WideStringReplace(Result,'<br /> <tr', '<tr', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> <td', '<td', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> </td>', '</td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> </tr>', '</tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<tr> <br />', '<tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<td> <br />', '<td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</td> <br />', '</td>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</tr> <br />', '</tr>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> </table>', '</table>', [rfReplaceAll]);
//Another new section -- don't screw up lists!
	Result := WideStringReplace(Result,'<br /><li', '<li', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></li', '</li', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /><ul', '<ul', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></ul', '</ul', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</li><br />', '</li>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'</ul><br />', '</ul>', [rfReplaceAll]);
//Yet another section -- object tags!
  Result := WideStringReplace(Result,'<br /><object', '<object', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> <object', '<object', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /><param', '<param', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> <param', '<param', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /></object>', '</object>', [rfReplaceAll]);
  Result := WideStringReplace(Result,'<br /> </object>', '</object>', [rfReplaceAll]);
{
  Result := WideString('');
  for i := 1 to Length(InString) do
  	begin
     	CharNum := Ord(WideChar(InString[i]));
     	if (CharNum = 13) then
        	Result := Result + WideString('<br />')
        	else
              if (CharNum <> 10) then
                 Result := Result + WideChar(CharNum);
     end; }
end;

function WideStringToJSUnicodeEx(InString: WideString): string;
var
i: integer;
CharNum: integer;

begin
  Result := '';
  for i := 1 to Length(InString) do
     begin
        CharNum := Ord(InString[i]);
        if (CharNum = 13) then
           Result := Result + ' '
        else
           if (Ord(InString[i])<>10) then
              Result := Result + '\u' + IntToHex(Ord(InString[i]), 4);
     end;
end;

function WideStringToJSUnicodeExNoTags(InString: WideString): string;
var
i: integer;
CharNum: integer;
InTag: Boolean;

begin
  Result := '';
  InTag := False;
  for i := 1 to Length(InString) do
     begin
        CharNum := Ord(InString[i]);
        Case CharNum of
           60:
              begin
                 InTag := True;
                 Result := Result + '<';
              end;
           62:
              begin
                 InTag := False;
                 Result := Result + '>';
              end;
           13: Result := Result + ' ';
           10:
           else
              begin
                 if InTag = False then
                    Result := Result + '\u' + IntToHex(Ord(InString[i]), 4)
                 else
                    Result := Result + Char(Ord(InString[i]));
              end;
        end; //End case
     end;
end;

function WideStringFromHTMLNumeric(InString: string): WideString;
var
NumString: string;
i: integer;
j: integer;
NewWChar: WideChar;
CharNum: integer;

begin
	Result := '';
	if Length(InString) < 1 then
  	Exit;

  i := 1;
  while i <= Length(InString) do
  	begin
			if Copy(InString, i, 2) = '&#' then
        	begin
           	if Copy(InString, i, 3) = '&#x' then //It's an HTML hex
              	begin
                 	NumString := '';
                    j := i+3;
                    while (InString[j] <> ';') and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + WideString(NumString);
                          Exit;
                       end;
                    CharNum := StrToIntDef('$' + NumString, 63);
                    if CharNum < $FFFF then
                    	begin
                       	NewWChar := WideChar(Word(CharNum));
                    		Result := Result + NewWChar;
                       end;
                    i := j+1;
                 end
           	else //It's a decimal
              	begin
                 	NumString := '';
                    j := i+2;
                    while (InString[j] <> ';') and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + WideString(NumString);
                          Exit;
                       end;
                    CharNum := StrToInt(NumString);
                    if CharNum < $FFFF then
                    	begin
                       	NewWChar := WideChar(Word(CharNum));
                    		Result := Result + NewWChar;
                       end;
                    i := j+1;
                 end;
           end
        else //it's a straight character
        	begin
           	Result := Result + WideChar(InString[i]);
              inc(i);
           end;
     end;
end;

function WideStringFromWSHTMLNumeric(InString: WideString): WideString;
var
NumString: WideString;
i: integer;
j: integer;
NewWChar: WideChar;
CharNum: integer;

begin
	Result := '';
	if Length(InString) < 1 then
  	Exit;

  i := 1;
  while i <= Length(InString) do
  	begin
			if Copy(InString, i, 2) = '&#' then
        	begin
           	if Copy(InString, i, 3) = '&#x' then //It's an HTML hex
              	begin
                 	NumString := '';
                    j := i+3;
                    while (InString[j] <> ';') and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + WideString(NumString);
                          Exit;
                       end;
                    CharNum := StrToIntDef('$' + NumString, 63);
                    if CharNum < $FFFF then
                    	begin
                       	NewWChar := WideChar(Word(CharNum));
                    		Result := Result + NewWChar;
                       end;
                    i := j+1;
                 end
           	else //It's a decimal
              	begin
                 	NumString := '';
                    j := i+2;
                    while (InString[j] <> ';') and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + WideString(NumString);
                          Exit;
                       end;
                    CharNum := StrToInt(NumString);
                    if CharNum < $FFFF then
                    	begin
                       	NewWChar := WideChar(Word(CharNum));
                    		Result := Result + NewWChar;
                       end;
                    i := j+1;
                 end;
           end
        else //it's a straight character
        	begin
           	Result := Result + WideChar(InString[i]);
              inc(i);
           end;
     end;
end;

function HDecimalUCodeToJHexUCode(InString: string): string;
//Converts &#____; decimal chars to \u____ JavaScript escapes
var
NumString: string;
i: integer;
j: integer;
CharNum: integer;

begin
	if (Pos('&#', InString) < 1) or (Length(InString) < 1) then
  	begin
     	Result := InString;
        Exit;
     end;

  i := 1;
  while i <= Length(InString) do
  	begin
			if Copy(InString, i, 2) = '&#' then
        	begin
           	if Copy(InString, i, 3) = '&#x' then //It's an HTML hex
              	begin
                 	NumString := '';
                    j := i+3;
                    while (InString[j] <> ';') and (InString[j] in ['0'..'9']) and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + NumString;
                          Exit;
                       end;
                    CharNum := StrToIntDef('$' + NumString, 63);
                    if CharNum < $FFFF then
                    	Result := Result + '\u' + NumString;
                    i := j+1;
                 end
           	else //It's a decimal
              	begin
                 	NumString := '';
                    j := i+2;
                    while (InString[j] <> ';') and (InString[j] in ['0'..'9']) and (j<=Length(InString)) do
                    	begin
                    		NumString := NumString + InString[j];
                          inc(j);
                       end;
                    if InString[j] <> ';' then
                    	begin
                    		Result := Result + NumString;
                          Exit;
                       end;
                    CharNum := StrToInt(NumString);
                    if CharNum < $FFFF then
                    	begin
                       	NumString := IntToHex(CharNum, 4);
                       	Result := Result + '\u' + NumString;
                       end;
                    i := j+1;
                 end;
           end
        else
        	begin
           	Result := Result + InString[i];
              inc(i);
           end;
     end;
end;

function JHexUCodeToHDecimalUCode(InString: string): string;
//Converts \u____ JavaScript escapes to &#____; decimal chars
var
NumString: string;
i: integer;
CharNum: integer;

begin
	if (Pos('\u', InString) < 1) or (Length(InString) < 1) then
  	begin
     	Result := InString;
        Exit;
     end;

  i := 1;
  while i <= Length(InString) do
  	begin
			if Copy(InString, i, 2) = '\u' then
        	begin
           	NumString := Copy(InString, i+2, 4);
					CharNum := StrToIntDef('$' + NumString, 63);
              if CharNum < $FFFF then
              	Result := Result + '&#' + IntToStr(CharNum) + ';';
              i := i+6;
           end
        else
        	begin
           	Result := Result + InString[i];
              inc(i);
           end;
     end;
end;

function JHexUCodeToWideString(InString: Widestring): WideString;
//Converts \u____ JavaScript escapes to Unicode chars
var
NumString: string;
i: integer;
CharNum: integer;

begin
	if (Pos('\u', InString) < 1) or (Length(InString) < 1) then
  	begin
     	Result := InString;
        Exit;
     end;

  i := 1;
  while i <= Length(InString) do
  	begin
			if Copy(InString, i, 2) = '\u' then
        	begin
           	NumString := Copy(InString, i+2, 4);
					CharNum := StrToIntDef('$' + NumString, 63);
              if CharNum < $FFFF then
              	Result := Result + WideChar(Word(CharNum));
              i := i+6;
           end
        else
        	begin
           	Result := Result + InString[i];
              inc(i);
           end;
     end;
end;

function PCCharToJSUnicode(InString: string): string;
var
i: integer;

begin
	if Length(InString) < 1 then
  	begin
			Result := '';
        Exit;
     end;
  for i := 1 to Length(InString) do
  	if Ord(InString[i]) > 127 then
     	begin
        	if (Ord(InString[i]) < 160) then
              Result := Result + '\u' + IntToHex(GetDecUnicodeNumFromWinNum(Ord(InString[i])), 4)
           else
              Result := Result + '\u' + IntToHex(Ord(InString[i]), 4);
        end
     else
     	Result := Result + InString[i];
end;

function WAllCharsToJSUnicode(InString: WideString): WideString;
var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
  for i := 1 to Length(InString) do
  	OutString := OutString + '\u' + UpperCase(IntToHex(Ord(InString[i]), 4));
  Result := OutString;
end;

function WAllCharsToHTMLHex(InString: WideString): WideString;
var
i: LongInt;
OutString: WideString;

begin
	OutString := '';
  for i := 1 to Length(InString) do
  	OutString := OutString + '&#x' + UpperCase(IntToHex(Ord(InString[i]), 4)) + ';';
  Result := OutString;
end;

function PCCharToJSUnicodeEx(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption
var
i: integer;

begin
	Result := '';
	if Length(InString) < 1 then
		begin
        Exit;
     end;
  for i := 1 to Length(InString) do
  	begin
     	if (Ord(InString[i]) < 160) and (Ord(InString[i]) > 127) then
           	Result := Result + '\u' + IntToHex(GetDecUnicodeNumFromWinNum(Ord(InString[i])), 4)
           else
          		Result := Result + '\u' + IntToHex(Ord(InString[i]), 4);
     end;
end;

function PCCharToHTMLNumeric(InString: string): string; //Includes all chars, not just those over 127 -- used for simple encryption
var
InTag: Boolean; //If we're inside a tag, do no conversion to avoid destroying HTML
i: integer;

begin
	Result := '';
	InTag := False;
  if Length(InString) < 1 then
  	begin
        Exit;
     end;
  for i := 1 to Length(InString) do
     begin
       	if InString[i] = '<' then
         	InTag := True;
       	if not InTag then
        	begin
              if (Ord(InString[i]) < 160) and (Ord(InString[i]) > 127) then
                 Result := Result + '&#' + IntToStr(GetDecUnicodeNumFromWinNum(Ord(InString[i]))) + ';'
              else
                 Result := Result + '&#' + IntToStr(Ord(InString[i])) + ';';
           end
       else
          Result := Result + InString[i];
       if InString[i] = '>' then
          InTag := False;
     end;
end;

function WTagsToLowerCase(InString: WideString): WideString;
var
i: integer;
InTag: Boolean;
InSQuotes: Boolean;
InDQuotes: Boolean;

begin
  Result := '';
  if Length(InString) < 1 then
     Exit;
  InTag := False;
  InDQuotes := False;
  InSQuotes := False;
  for i := 1 to Length(InString) do
     begin
        if InString[i] = WideChar('>') then
           InTag := False;
        if (InTag = True) and (InString[i] = WideChar('>')) then
           InTag := False;
        if InTag = True then
           begin
              if InString[i] = WideChar('"') then
                 InDQuotes := not InDQuotes;
              if InString[i] = WideChar('''') then
                 InSQuotes := not InSQuotes;
           end;
        if InString[i] = WideChar('<') then
           InTag := True;
        if not InTag or (InDQuotes or InSQuotes) then
           Result := Result + InString[i]
        else
           Result := Result + WideLowerCase(InString[i]);
     end;
end;

function MakeRTLRightAlignedPara(InString: string): string;
var
WS: WideString;
begin
  Result := InString;
//Use the first and last char to determine RTL status
  WS := WideStringFromHTMLNumeric(InString);
  if (UnicodeIsRightToLeft(Cardinal(WS[1]))) and (UnicodeIsRightToLeft(Cardinal(WS[Length(WS)]))) then
     Result := '<p class="rtl">' + InString + '</p>'
end;

function FirstNonPuncCharIsRTL(InString: WideString): Boolean;
var
i: integer;

begin
  Result := False;
  if Length(InString) < 1 then
     Exit;
  i := 1;
  while (UnicodeIsPunctuation(Cardinal(InString[i]))) and (i < Length(InString)) do
     inc(i);
  Result := UnicodeIsRightToLeft(Cardinal(InString[i]));
end;

function LastNonPuncCharIsRTL(InString: WideString): Boolean;
var
i: integer;

begin
  Result := False;
  if Length(InString) < 1 then
     Exit;
  i := Length(InString);
  while ((UnicodeIsPunctuation(Cardinal(InString[i]))) or (UnicodeIsMark(Cardinal(InString[i])))) and (i > 1) do
     dec(i);
  Result := UnicodeIsRightToLeft(Cardinal(InString[i]));
end;

function HasMoreRTLCharsThanLTRChars(InString: WideString): Boolean;
var
RTLChars: integer;
i: integer;

begin
  Result := False;
  if Length(InString) < 1 then
     Exit;
  RTLChars := 0;
  for i := 1 to Length(InString) do
     if UnicodeIsRightToLeft(Cardinal(InString[i])) then
        inc(RTLChars);

  Result := (RTLChars > (Length(InString)/2));
end;

function TextIsMostLikelyRTL(InString: WideString): Boolean;
begin
  Result := False;
  if ((FirstNonPuncCharIsRTL(InString)) and (LastNonPuncCharIsRTL(InString))) or
     (HasMoreRTLCharsThanLTRChars(InString)) then
     Result := True;
end;

function InsertRTLSpans(InString: WideString): WideString;
var
i: integer;
InRTL: Boolean;
InTag: Boolean;

begin
  Result := '';
  if Length(InString) < 1 then
     Exit;

  InTag := False;

  InRTL := UnicodeIsRightToLeft(Cardinal(InString[1]));
  if InRTL then
     Result := '<span class="RTLText">';
  for i := 1 to Length(InString) do
     begin
        if (InString[i] = '<') and (not InTag) then
           begin
              InTag := True;
              if InRTL then
                 begin
                    Result := Result + '</span>';
                    InRTL := False;
                 end;
           end;

        if InTag then
           Result := Result + InString[i]
        else
           begin
              if InRTL then
                 begin
                    if WUnicodeChangesDirectionality(True, InString[i]) then
                       begin
                          Result := Result + '</span>';
                          InRTL := False;
                       end;
                    Result := Result + InString[i]
                 end
              else
                 begin
                    if UnicodeIsRightToLeft(Cardinal(InString[i])) then
                       begin
                          InRTL := True;
                          Result := Result + '<span class="RTLText">';
                       end;
                    Result := Result + InString[i];
                 end;
           end;
        if (InString[i] = '>') and (InTag) then
           InTag := False;
     end;
  if InRTL then
     Result := Result + '</span>';
end;

function ProcessForRTLBoundaries(InString: WideString; Centred: Boolean): WideString;
var
CSSClass: WideString;

begin
  Result := InString;
  if Centred then
     CSSClass := 'CentredRTLText'
  else
     CSSClass := 'RTLText';
  if Length(InString) < 1 then
     Exit;
  if FirstNonPuncCharIsRTL(InString) then
     begin
        if LastNonPuncCharIsRTL(InString) then
           begin
              Result := '<p class="' + CSSClass + '">' + InString + '</p>';
              Exit;
           end
        else
           begin
              if HasMoreRTLCharsThanLTRChars(InString) then
                 begin
                    Result := '<p class="' + CSSClass + '">' + InString + '</p>';
                    Exit;
                 end
              else
                 begin
                    Result := InsertRTLSpans(InString);
                    Exit;
                 end;
           end;
     end
  else
     begin
        if LastNonPuncCharIsRTL(InString) then
           begin
              if HasMoreRTLCharsThanLTRChars(InString) then
                 begin
                    Result := '<p class="RTLText">' + InString + '</p>';
                    Exit;
                 end
              else
                 begin
                    Result := InsertRTLSpans(InString);
                    Exit;
                 end;
           end
        else
           begin
              Result := InsertRTLSpans(InString);
           end;
     end;
end;

function TextIsMostLikelyCJK(InString: WideString): Boolean;
var
i: integer;
FirstChar, LastChar: WideChar;

begin
  Result := False;
  if Length(InString) < 1 then
     Exit;
  i := 1;
  while (i<=Length(InString)) and (WUnicodeIsSpacePunc(InString[i])) do
     Inc(i);
  FirstChar := InString[i];
  i := Length(InString);
  while (i>1) and (WUnicodeIsSpacePunc(InString[i])) do
     Dec(i);
  LastChar := InString[i];
  Result := (WUnicodeIsCJK(FirstChar) and WUnicodeIsCJK(LastChar));
end;

function ParseWideStringToPerceivedChars(var CharList: TWideStringList; WInString: WideString;
 RemoveAscii: Boolean): Boolean;
//This function takes a widestring text and splits it into "perceived characters", each
//assigned to one position in the list. "Perceived" means that each character will
//have its own position, unless it has following combining diacritical marks; in that
//case, the char + diacritics will be grouped together.
var
i: integer;
CharNum: Cardinal;
CodeBlock: TUnicodeBlock;
PercChar: WideString;
Len: integer;
WS: WideString;

begin
//Default return
	Result := False;

  try
//Clear the list
     CharList.Clear;

//Exit if no data
     Len := Length(WInString);
     if Len < 1 then
        Exit;

//Set up the list
//     CharList.Duplicates := dupIgnore; //Can't do this -- it ignores lowercase/uppercase

//Strip returns etc.
		WS := '';
		for i := 1 to Len do
        begin
           CharNum := Cardinal(WInString[i]);
           if not (CharNum in [13,10]) then
              if (not UnicodeIsLineSeparator(CharNum)) and (not UnicodeIsParagraphSeparator(CharNum)) then
                 WS := WS + WInString[i];
        end;
     Len := Length(WS);

//Go through the chars

     i := 1;
     while i < Len do
        begin
           PercChar := WS[i];
           inc(i);
           if (i>Len) then
              Break;
           CodeBlock := CodeBlockFromChar(UCS4(WS[i]));
           while ((CodeBlock in [ubCombiningDiacriticalMarks,ubCombiningMarksForSymbols,ubCombiningHalfMarks]) and (i<=Len)) do
              begin
                 PercChar := PercChar + WS[i];
                 inc(i);
                 CodeBlock := CodeBlockFromChar(UCS4(WS[i]));
              end;
           if not RemoveAscii then
           	CharList.Append(PercChar)
           else
           	if (Length(PercChar) > 1) or (PercChar[1] > #$007F) then
              	CharList.Append(PercChar);
           PercChar := '';
        end;
//Add the last one if necessary
     if i <= Len then
        if WS[Len] > #$007F then
           CharList.Append(WideString(WS[Len]));

//Return success if any entries added
     Result := (CharList.Count > 0);
  except
  	Result := False;
  end;
end;

//Shows returns in a string by inserting a character #172 before each one
function ShowReturns(InString: string): string;
var
i: integer;
begin
	Result := '';
  if Length(InString) < 1 then
  	Exit;
	for i := 1 to Length(InString) do
  	if InString[i] = #13 then
     	Result := Result + #172#13
     else
     	Result := Result + InString[i];

end;

function UniqueIDFromTime: string;
begin
  Result := FloatToStr(Now);
  Result := ReplaceStuff('.', '', Result);
  Result := ReplaceStuff(',', '', Result);
end;


procedure CopyUnicodeTextToClipboard(InText: WideString);
var
DataHandle :  THandle;
FromPointer:  Pointer;
ToPointer  :  Pointer;
WS         :  WideString;
begin
  WS := InText;
  SetLength(WS, Length(WS)+1);
  WS[Length(WS)] := WideChar($0000);  // Null-terminator
  DataHandle := GlobalAlloc(GMEM_DDESHARE OR GMEM_MOVEABLE,
                            Length(WS)*SizeOf(WChar));
  try

     ToPointer   := GlobalLock(DataHandle);
//NOTE: THE FOLLOWING LINE LOOKS WRONG! SHOULD IT BE ...@WS[1]?
     FromPointer := @InText[1];
     Move(FromPointer^, ToPointer^, Length(WS)*SizeOf(WChar));
  finally
     GlobalUnlock(DataHandle);
  end;

  OpenClipboard(Application.Handle);
//  EmptyClipboard;
  SetClipboardData(CF_UNICODETEXT, DataHandle);
  CloseClipboard;
end;

function WGetUnicodePunctuation(InString: WideString): string;
var
i: integer;
Temp: string;

begin
  Result := '';
  if Length(InString) < 1 then
     Exit;
  for i := 1 to Length(InString) do
     if UnicodeIsPunctuation(Cardinal(InString[i])) then
        begin
           Temp := '\u' + IntToHex(Ord(InString[i]), 4);
           if Pos(Temp, Result) < 1 then
              Result := Result + Temp;
        end;
end;

function WGetUnicodeOpenPunctuation(InString: WideString): string;
var
i: integer;
Temp: string;

begin
  Result := '';
  if Length(InString) < 1 then
     Exit;
//Include ordinary quotes as well as initial and open, because these can be initial
  for i := 1 to Length(InString) do
     if (UnicodeIsInitialPunctuation(Cardinal(InString[i]))) or
        (UnicodeIsOpenPunctuation(Cardinal(InString[i]))) or
        (UnicodeIsQuotationMark(Cardinal(InString[i]))) then
        begin
           Temp := '\u' + IntToHex(Ord(InString[i]), 4);
           if Pos(Temp, Result) < 1 then
              Result := Result + Temp;
        end;
end;

function WUnicodeIsSpacePunc(InChar: WideChar): Boolean;
begin
  Result := UnicodeIsPunctuation(Cardinal(InChar)) or UnicodeIsSpace(Cardinal(InChar)) or (InChar in [WideChar(#13), WideChar(#10)]);
end;

function WUnicodeIsReturn(InChar: WideChar): Boolean;
begin
  Result := (UnicodeIsLineSeparator(Cardinal(InChar)) or
              (UnicodeIsParagraphSeparator(Cardinal(InChar)))) or
                 (InChar in [WideChar(#13), WideChar(#10)]);
end;

function WUnicodeIsCJK(InChar: WideChar): Boolean;
begin
  Result := CodeBlockFromChar(UCS4(InChar)) in [ubCJKRadicalsSupplement..ubHangulSyllables,
          ubCJKCompatibilityIdeographs, ubCJKCompatibilityForms,
          ubCJKUnifiedIdeographsExtensionB, ubCJKCompatibilityIdeographsSupplement];
end;

function WUnicodeIsCombining(InChar: WideChar): Boolean;
begin
//Tweaked to comment out the second condition 26/04/04 -- no known
//reason for it, and it breaks other functions!
  Result := UnicodeIsMark(Cardinal(InChar)){ or (Ord(InChar) < 256)};
end;

function WUnicodeChangesDirectionality(RTL: Boolean; InChar: WideChar): Boolean;
begin
  if (UnicodeIsRightToLeft(Cardinal(InChar)) = RTL) or
     (WUnicodeIsCombining(InChar)) or
     (WUnicodeIsSpacePunc(InChar))  then
     Result := False
  else
     Result := True;
end;

function BuildWebKeypad(InString: WideString; ProcessForRTL: Boolean): string;
//This function is contains an adaptation of the StringFunctions
//ParseWideStringToPerceivedChars function -- they may be merged at some stage
var
CharList: TTntStringList;
WText: WideString;
i,k: integer;
CharNum: Cardinal;
//CodeBlock: Cardinal;
PercChar: WideString;
Len: integer;
WS: WideString;
AlreadyInList: Boolean;
IsMark: Boolean;

begin
  Result := '';
  CharList := TTntStringList.Create;
  try
     CharList.Sorted := True;
     CharList.Duplicates := dupAccept; //dupIgnore doesn't work!!!! Thinks all sorts of chars are identical!
     WText := WideTrim(InString);

//Exit if no data
     Len := Length(WText);
     if Len < 1 then
        Exit;

//Strip returns etc.
		WS := '';
		for i := 1 to Len do
        begin
           CharNum := Cardinal(WText[i]);
           if not (CharNum in [13,10]) then
              if ((not UnicodeIsLineSeparator(CharNum)) and
                 (not UnicodeIsParagraphSeparator(CharNum)) and
                 (not UnicodeIsWhiteSpace(CharNum))) then
                 WS := WS + WText[i];
        end;
     Len := Length(WS);

//Go through the chars
     i := 1;
     while i < Len do
        begin
           PercChar := WS[i];
           inc(i);
           if (i>Len) then
              Break;
{//This code modified for HotPot 6.0.2.4; Arabic combining diacritics don't test true
//using this system, so UnicodeIsMark used instead
           CodeBlock := CodeBlockFromChar(WS[i]);
           while ((CodeBlock in [6,31,62]) and (i<=Len)) do
              begin
                 PercChar := PercChar + WS[i];
                 inc(i);
                 CodeBlock := CodeBlockFromChar(WS[i]);
              end;
}
           IsMark := UnicodeIsMark(Cardinal(WS[i]));
           while ((IsMark) and (i<=Len)) do
              begin
                 PercChar := PercChar + WS[i];
                 inc(i);
                 IsMark := UnicodeIsMark(Cardinal(WS[i]));
              end;

           if (Length(PercChar) > 1) or (PercChar[1] > #$007F) then
              begin
                 if CharList.Count > 0 then
                    begin
                       AlreadyInList := False;
                       for k := 0 to CharList.Count - 1 do
                          if WideStringToHTMLHex(CharList[k]) = WideStringToHTMLHex(PercChar) then 
                             AlreadyInList := True;

                       if not AlreadyInList then
                          CharList.Add(PercChar);
                    end
                 else
                    begin
                       CharList.Add(PercChar);
                    end;
              end;
           PercChar := '';
        end;
//Add the last one if necessary
     if i <= Len then
        if WS[Len] > #$007F then
           begin
              PercChar := WideString(WS[Len]);
              if CharList.Count > 0 then
                 begin
                    AlreadyInList := False;
                    for k := 0 to CharList.Count - 1 do
                       if WideStringToHTMLHex(CharList[k]) = WideStringToHTMLHex(PercChar) then
                          AlreadyInList := True;

                    if not AlreadyInList then
                       CharList.Add(PercChar);
                 end
               else
                  begin
                     CharList.Add(PercChar);
                  end;
           end;

//Bail if empty
     if CharList.Count < 1 then
        Exit;

//Build the keypad
     for i := 0 to CharList.Count - 1 do
        begin
           Result := Result + '<button onclick="TypeChars(''' +
                                WideStringToJSUnicodeEx(CharList[i]) + '''); return false;">';
           if ProcessForRTL then
              Result := Result + WideStringToHTMLOutput(CharList[i], True)
           else
              Result := Result + WideStringToHTMLHex(CharList[i]);
           Result := Result + '</button> '; //Add a space to allow wrapping in Safari!
        end;

  finally
     CharList.Free;
  end;
end;

function WConvertUnmatchedAngleBrackets(InString: WideString): WideString;
var
i: integer;
InTag: Boolean;
OutString: WideString;
WOpen, WClose: WideChar;

begin
  Result := '';
  if Length(InString) < 1 then
     Exit;

  WOpen := '<';
  WClose := '>';
  InTag := False;

//Go from the beginning, looking for unmatched
  for i := 1 to Length(InString) do
     if InTag then
        begin
           if InString[i] = WClose then
              InTag := False;
           OutString := OutString + InString[i];
        end
     else
        begin
           if InString[i] = WClose then
              OutString := OutString + '&gt;'
           else
              begin
                 if InString[i] = WOpen then
                    InTag := True;
                 OutString := OutString + InString[i];
              end;
        end;
//Now go from the end, repeating the process
  InString := OutString;
  OutString := '';
  InTag := False;
  for i := Length(InString) downto 1 do
     if InTag then
        begin
           if InString[i] = WOpen then
              InTag := False;
           OutString := InString[i] + OutString;
        end
     else
        begin
           if InString[i] = WOpen then
              OutString := '&lt;' + OutString
           else
              begin
                 if InString[i] = WClose then
                    InTag := True;
                 OutString := InString[i] + OutString;
              end;
        end;
  Result := OutString;
end;

{This function parses out the tag name, tag content, and attribute values in a
WideString XML tag passed to it, and stores the results in a TTntStringList as
Name/Value pairs so the values can easily be looked up by the calling function.
It stores the tag name and content with special name structures so they are
not mistaken for attributes.}
function WParseTagOrAttributes(InString: WideString; uslPairs: TTntStringList): Boolean;
var
i: integer;
TagName: WideString;
TagContent: WideString;
Atts: WideString;
CloseTagPos: integer;
QuoteChar: WideChar;
NameVal: WideString;
InsideAtt: Boolean;
//TODO: THIS WILL FAIL WITH SELF-CLOSING TAGS!!!!!!!!!
  function IsTagnameTerminator(InChar: WideChar): Boolean;
  begin
    Result := (InChar in [WideChar(' '), WideChar('>'), WideChar('/')]);
  end;

  function IsAttsTerminator(InChar: WideChar): Boolean;
  begin
    Result := (InChar in [WideChar('>')]);
  end;
begin
  Result := False;
  try
    if Length(InString) < 5 then
      Exit;
    uslPairs.Clear;
    uslPairs.NameValueSeparator := '=';

//Initialize everything
    InString := WideTrim(InString);
    TagName := '';
    Atts := '';
    TagContent := '';
//First, is it a tag or just attributes?
    if InString[1] = '<' then
      begin
//The tag name is everything up to the first space or close angle bracket
        i := 2;
        while ((not IsTagnameTerminator(InString[i]))
              and (i < Length(InString))) do
          begin
            TagName := TagName + InString[i];
            inc(i);
          end;
//Add the tagname to the list
        uslPairs.Add('__tagname__=' + TagName);
//Get the attributes and tag content if there are any
        if InString[i] = WideChar(' ') then
          begin
//There are attributes and everything up to the close bracket or slash is part of them
            Atts := '';
            inc(i);
            while ((not IsAttsTerminator(InString[i]))
                and (i < Length(InString))) do
              begin
                Atts := Atts + InString[i];
                inc(i);
              end;
          end;
//Now get the content and stash it, if there is any
        if Pos('>', InString) < Length(InString) then
          begin
            i := Pos('>', InString) + 1;
            CloseTagPos := Pos('</' + TagName + '>', InString);
            if CloseTagPos > i then
              begin
                TagContent := Copy(InString, i, CloseTagPos-i);
                uslPairs.Add('__tagcontent__=' + TagContent);
              end;
          end;
      end
    else
      begin
//This whole thing is attributes
        Atts := InString;
      end;
      if Length(Atts) > 4 then
        begin
//First, find out the quotechar by checking for the first instance of " or '
          QuoteChar := '?';
          i := 1;
          while (QuoteChar = '?') and (i < Length(Atts)) do
            if Atts[i] in [WideChar(''''), WideChar('"')] then
              QuoteChar := Atts[i]
            else
              inc(i);
          if QuoteChar = '?' then
            begin
              Result := True;
              Exit; //There are no valid attribute values.
            end;

//Now split the name/value pairs by breaking on every instance of a quotechar
//preceding a whitespace
          NameVal := '';
          InsideAtt := False;
          for i := 1 to Length(Atts) do
          begin
            if Atts[i] <> QuoteChar then
              begin
                NameVal := NameVal + Atts[i];
              end
            else
              begin
                if InsideAtt then
//this is the end of a name-val pair
                  begin
                    uslPairs.Add(WideTrim(NameVal));
                    NameVal := '';
                    InsideAtt := False;
                  end
                else
//This is the first element in a name-val
                  begin
                    InsideAtt := True;
                  end;
              end;
          end;
      end;
    Result := True;
  except
    Result := False;
  end;
end;

//This function assesses the likelihood that a string is actually a Web URL, and
//returns true if it seems likely.
function WStrIsLikelyURL(InString: WideString): Boolean;
var
Likelihood: integer;

begin
  Likelihood := 0; //starting point
  InString := WideTrim(InString);
//First, check for a protocol near the beginning
  if Pos('://', InString) < 7 then
    inc(Likelihood);
//Look at the extension, if there is one
  if Pos(WideLowerCase(WideExtractFileExt(InString)), WebExts) > 0 then
    inc(Likelihood);
//See if the last char is a slash
  if InString[Length(InString)] = WideChar('/') then
    inc(Likelihood);
//Return true for any hit -- may be too permissive, but relative links can be
//very simple
  Result := (Likelihood > 0);
end;

function IsWholeWordHit(wsText: WideString; StartPos, EndPos: integer): Boolean;
begin
  Result := False; //default
  if ((StartPos = 1) or (WUnicodeIsSpacePunc(wsText[StartPos-1]))) then
    if ((EndPos = Length(wsText)) or (WUnicodeIsSpacePunc(wsText[EndPos+1]))) then
      Result := True;
end;

function ParseAppVersionFromWideString(wsInput: WideString;
                                        var verMajor: integer;
                                        var verMinor: integer;
                                        var verBuild: integer;
                                        var verRelease: integer;
                                        var wsOutput: WideString): Boolean;
var
wsTemp: WideString;
i: integer;
InDelimiter: Boolean;
NumsFound: integer;

begin
  Result := False; //default
  wsInput := WideTrim(wsInput);
//Set defaults
  verMajor := 0;
  verMinor := 0;
  verBuild := 0;
  verRelease := 0;
  wsTemp := '';
  InDelimiter := wsInput[1] in [WideChar('0')..WideChar('9')];
  NumsFound := 0;
  wsOutput := '';

//Split the string based on any delimiter which is not a number
  if Length(wsInput) > 0 then
    begin
      for i := 1 to Length(wsInput) do
        begin
          if wsInput[i] in [WideChar('0')..WideChar('9')] then
            begin
              InDelimiter := False;
              wsTemp := wsTemp + wsInput[i];
            end
          else
            begin
              if (not InDelimiter) and (Length(wsTemp) > 0) then
                begin
                  inc(NumsFound);
                  Result := True;
                  Case NumsFound of
                    1:
                      begin
                        verMajor := StrToIntDef(wsTemp, 0);
                        wsOutput := wsTemp;
                      end;

                    2:
                      begin
                        verMinor := StrToIntDef(wsTemp, 0);
                        wsOutput := wsOutput + '.' + wsTemp;
                      end;
                    3:
                      begin
                        verBuild := StrToIntDef(wsTemp, 0);
                        wsOutput := wsOutput + '.' + wsTemp;
                      end;
                    4:
                      begin
                        verRelease := StrToIntDef(wsTemp, 0);
                        wsOutput := wsOutput + '.' + wsTemp;
                      end;
                  end;
                  wsTemp := '';
                  InDelimiter := True;
                end;
            end;
        end;
      if Length(wsTemp) > 0 then
        begin
          inc(NumsFound);
          Case NumsFound of
            1:
              begin
                verMajor := StrToIntDef(wsTemp, 0);
                wsOutput := wsTemp;
              end;

            2:
              begin
                verMinor := StrToIntDef(wsTemp, 0);
                wsOutput := wsOutput + '.' + wsTemp;
              end;
            3:
              begin
                verBuild := StrToIntDef(wsTemp, 0);
                wsOutput := wsOutput + '.' + wsTemp;
              end;
            4:
              begin
                verRelease := StrToIntDef(wsTemp, 0);
                wsOutput := wsOutput + '.' + wsTemp;
              end;
          end;
        end;
    end;
end;

function FileHasUTF8Header(const FileName: WideString): Boolean;
var
sList: TStringList;

begin
  Result := False; //default
  sList := TStringList.Create;
  try
    sList.LoadFromFile(FileName);
    if HasUTF8Header(sList.Text) then
      Result := True;
  finally
    sList.Free;
  end;
end;

//This function is designed to search an 8-bit string for any
//sequences that suggest it should be treated as UTF-8.
function HasUTF8Header(InString: string): Boolean;
var
strTemp: string;

begin
  Result := False; //Default

//Trim the string
  InString := Trim(InString);
//Look for an XML header
  if Copy(InString, 1, 5) = '<?xml' then
    begin
      strTemp := Copy(InString, 1, Pos('?>', InString));
      strTemp := UpperCase(strTemp);
      Result := True; // default encoding is UTF-8 in XML
      if Pos('ENCODING', strTemp) > 0 then
        if Pos('UTF-8', strTemp) < 0 then
          Result := False; //Alternative encoding is specified
      Exit;
    end;
//Maybe it's an HTML file
  InString := UpperCase(InString);
  strTemp := GetTextBetween(InString, '<HEAD', '</HEAD>');
  if Length(strTemp) > 0 then
    if Pos('CHARSET', strTemp) > 0 then
      if Pos('UTF-8', strTemp) > Pos('CHARSET', strTemp) then
        Result := True;
end;

//This function analyses the bytes in a file looking for likely
//sequences that indicate that the file may be UTF-8. It's based
//on a suggested algorithm here:
//http://mail.nl.linux.org/linux-utf8/1999-09/msg00110.html

//It's not perfect; little-endian Unicode triggers a True result, so
//check for a BOM first before relying on this!
function FileMayBeUTF8(FileName: WideString): Boolean;
var
Stream: TMemoryStream;
BytesRead: integer;
ArrayBuff: array[0..127] of byte;
PreviousByte: byte;
i: integer;
YesSequences, NoSequences: integer;

begin
  if not WideFileExists(FileName) then
    Exit;
  YesSequences := 0;
  NoSequences := 0;
  Stream := TMemoryStream.Create;
  try
    Stream.LoadFromFile(FileName);
    repeat

    {read from the TMemoryStream}

      BytesRead := Stream.Read(ArrayBuff, High(ArrayBuff) + 1);
          {Do the work on the bytes in the buffer}
      if BytesRead > 1 then
        begin
          for i := 1 to BytesRead-1 do
            begin
              PreviousByte := ArrayBuff[i-1];
              if ((ArrayBuff[i] and $c0) = $80) then
                begin
                  if ((PreviousByte and $c0) = $c0) then
                    begin
                      inc(YesSequences)
                    end
                  else
                    begin
                      if ((PreviousByte and $80) = $0) then
                        inc(NoSequences);
                    end;
                end;
            end;
        end;
    until (BytesRead < (High(ArrayBuff) + 1));
//Below, >= makes ASCII files = UTF-8, which is no problem.
//Simple > would catch only UTF-8;
    Result := (YesSequences >= NoSequences);

  finally
    Stream.Free;
  end;
end;



//There's something wrong with this function. It throws up an abstract error
//when trying to load the file. No idea why yet, so don't use it!
function LoadFileCarefullyToWideString(FileName: WideString): WideString;
var
TntStrings: TTntStrings;
Stream: TTntFileStream;
FileCharSet: TTntStreamCharSet;

begin
  TntStrings := TTntStrings.Create;
  try

    Stream := TTntFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
    try
//First, check for charset in the normal way
      FileCharSet := AutoDetectCharacterSet(Stream);
      Stream.Position := 0;
//If it comes back ansi, we have to be careful
      if FileCharSet = csAnsi then
        begin
          if FileMayBeUTF8(FileName) then
            begin
              TntStrings.AnsiStrings.LoadFromStreamEx(Stream, CP_UTF8);
            end
          else
            begin
{Now we should try reading the file for any other clues. It's likely, though,
that if the above test failed and yet it says it's UTF-8, it's actually not; but
we have to honour the declaration, I think.  }
              if FileHasUTF8Header(FileName) then
                TntStrings.AnsiStrings.LoadFromStreamEx(Stream, CP_UTF8)
              else
                TntStrings.AnsiStrings.LoadFromStream(Stream);
            end;
        end
      else
        begin
//It's got a BOM, so we can load it in the normal way
          TntStrings.LoadFromStream(Stream);
      end;
    finally
      Stream.Free;
    end;

    Result := TntStrings.Text;
  finally
    TntStrings.Free;
  end;
end;

function GetFileCharSet(FileName: WideString): TTntStreamCharSet;
var
Stream: TTntFileStream;

begin
  Stream := TTntFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
//First, check for charset in the normal way
    Result := AutoDetectCharacterSet(Stream);
  finally
    Stream.Free;
  end;
end;

//This takes a wide string, and if it's longer than MaxLen, truncates it
//and adds an ellipsis.
function WTruncWSToMaxLen(InString: WideString; MaxLen: integer): WideString;
begin
  Result := InString;
  if MaxLen < 1 then
    Exit;
  if Length(InString) <= MaxLen then
    Exit
  else
    begin
      if MaxLen >=4 then
        begin
          Result := Copy(InString, 1, MaxLen-3) + WideString('...');
        end
      else
        Result := Copy(InString, 1, MaxLen);
    end;
end;

end.
