<TITLE> értékének megszerzése

<TITLE> értékének megszerzése
2006-11-05T11:05:37+01:00
2006-11-05T20:47:53+01:00
2022-11-02T03:40:48+01:00
3delite
Hi!

Egy egyszerű Vasárnap délelőtti kérdésem van: Hogy tudom a lehető a legegyszerűbben megszerezni a <TITLE> tag értékét egy .html, .htm file-ból?

A problémám ott van, hogy a <TITLE> lehet írva kicsivel is, vagy naggyal is (én mindig naggyal írom), így már nem is olyan egyszerű megtalálni.

A kódom (félkész):

function GetHTMLTitle(FileName: String): String; var Dok: TStrings; Fext: String; i: Integer; begin Result := ''; if NOT FileExists(FileName) then Exit; Fext := AnsiUpperCase(ExtractFileExt(FileName)); try Dok := TStringList.Create; try Dok.LoadFromFile(FileName); if (Fext = '.HTM') OR (Fext = '.HTML') then begin //* <title>Title</title> //* Search twice: lower and uppercase i := Pos('<title>', Dok.Text); if i > 0 then begin end else begin i := Pos('<TITLE>', Dok.Text); end; if Result <> '' then begin Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; end; if (Fext = '.MHT') then begin //* Subject: Title //* Search is limited to only the first 16 lines for i := 0 to 16 do begin if UpperCase(Copy(Dok[i], 1, 8)) = 'SUBJECT:' then begin Result := Copy(Dok[i], 10, Length(Dok[i])); end; end; if Result <> '' then begin Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; end; except //* end; finally FreeAndNil(Dok); end; end;

Üdv
3delite
Mutasd a teljes hozzászólást!
i := Pos('<TITLE>', ansiuppercase(Dok.Text));
Mutasd a teljes hozzászólást!

  • Ok eddig megvolnánk, thanx, a továbbfejlesztett forrás:

    function GetHTMLTitle(FileName: String): String; var Dok: TStrings; Fext, Text: String; i: Integer; StartPos, EndPos: Int64; begin Result := ''; if NOT FileExists(FileName) then Exit; Fext := AnsiUpperCase(ExtractFileExt(FileName)); try Dok := TStringList.Create; try Dok.LoadFromFile(FileName); if (Fext = '.HTM') OR (Fext = '.HTML') then begin //* <title>Title</title> Text := AnsiUpperCase(Dok.Text); StartPos := Pos('<TITLE>', Text) + 7; EndPos := Pos('</TITLE>', Text); if i > 0 then begin Result := Copy(Dok.Text, StartPos, EndPos - StartPos); end; end; if (Fext = '.MHT') then begin //* Subject: Title //* Search is limited to only the first 16 lines for i := 0 to 15 do begin if i >= Dok.Count then Break; if UpperCase(Copy(Dok[i], 1, 8)) = 'SUBJECT:' then begin Result := Copy(Dok[i], 10, Length(Dok[i])); Break; end; end; end; if Result <> '' then begin Result := DecodeUnicode(Result); //Result := UTF8Decode(Result); Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; except //* end; finally FreeAndNil(Dok); end; end;

    Következő probléma: MIME encode-olt string konvertálása

    Üdv
    Buddha arc
    Mutasd a teljes hozzászólást!
  • Hogy ne nézzen át 1GB-os file-okat teljesen:

    function GetHTMLTitle(FileName: String): String; var Dok: TStrings; Fext, Text: String; i: Integer; StartPos, EndPos: Int64; begin Result := ''; if NOT FileExists(FileName) then Exit; Fext := AnsiUpperCase(ExtractFileExt(FileName)); try Dok := TStringList.Create; try Dok.LoadFromFile(FileName); if (Fext = '.HTM') OR (Fext = '.HTML') then begin //* <title>Title</title> //* Limit to 10KB Text := AnsiUpperCase(Copy(Dok.Text, 1, 1024 * 10)); StartPos := Pos('<TITLE>', Text) + 7; EndPos := Pos('</TITLE>', Text); if i > 0 then begin Result := Copy(Dok.Text, StartPos, EndPos - StartPos); end; end; if (Fext = '.MHT') then begin //* Subject: Title //* Search is limited to only the first 16 lines for i := 0 to 15 do begin if i >= Dok.Count then Break; if UpperCase(Copy(Dok[i], 1, 8)) = 'SUBJECT:' then begin Result := Copy(Dok[i], 10, Length(Dok[i])); Break; end; end; //Result := MimeDecodeString(Result); end; if Result <> '' then begin Result := DecodeUnicode(Result); //Result := UTF8Decode(Result); Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; except //* end; finally FreeAndNil(Dok); end; end;
    Mutasd a teljes hozzászólást!
  • És akkor a kész változat:

    //* MIMEDecodeStr() by 3delite //* Decode a MIME (Base64) encoded line function MIMEDecodeStr(MIMEStr: String): String; var Buffer: String; Botu: Char; i: Integer; StartPos, Offset: Int64; begin try Result := MIMEStr; StartPos := Pos('Q?', Result) + 2; if StartPos > 2 then begin Result := Copy(Result, StartPos, Length(Result)); Buffer := ''; Offset := 0; for i := 1 to Length(Result) - 2 do begin Botu := Result [i + Offset]; if Botu = '=' then begin Buffer := Buffer + Chr(HexToInt(Result [i + 1 + Offset] + Result [i + 2 + Offset])); Inc(Offset, 2); //Inc(i, 2); end else Buffer := Buffer + Result [i + Offset]; if i + Offset >= Length(Result) - 2 then Break; end; Result := ANSIReplaceStr(Buffer, '_', ' '); end; except //* end; end; //* GetHTMLTitle() by 3delite //* Get the Title of .html .htm .mht files function GetHTMLTitle(FileName: String): String; var Dok: TStrings; Fext, Text: String; i, k: Integer; StartPos, EndPos, Offset: Int64; begin Result := ''; if NOT FileExists(FileName) then Exit; Fext := AnsiUpperCase(ExtractFileExt(FileName)); try Dok := TStringList.Create; try Dok.LoadFromFile(FileName); if (Fext = '.HTM') OR (Fext = '.HTML') then begin //* <title>Title</title> //* Limit to 20KB Text := AnsiUpperCase(Copy(Dok.Text, 1, 1024 * 20)); StartPos := Pos('<TITLE>', Text) + 7; EndPos := Pos('</TITLE>', Text); if i > 0 then begin Result := Copy(Dok.Text, StartPos, EndPos - StartPos); end; end; if (Fext = '.MHT') then begin //* Subject: Title //* Search is limited to only the first 16 lines k := 1; for i := 0 to 15 do begin if i >= Dok.Count then Break; if UpperCase(Copy(Dok[i], 1, 8)) = 'SUBJECT:' then begin Result := MIMEDecodeStr(Trim(Copy(Dok[i], 10, Length(Dok[i])))); while (UpperCase(Copy(Dok[i + k], 1, 5)) <> 'DATE:') AND (i <= Dok.Count) do begin Result := Result + MIMEDecodeStr(Trim(Copy(Dok[i + k], 1, Length(Dok[i + k])))); Inc(k); end; Break; end; end; end; if Result <> '' then begin Result := DecodeUnicode(Result); //Result := UTF8Decode(Result); Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; except //* end; finally FreeAndNil(Dok); end; end;

    Mutasd a teljes hozzászólást!
  • Apró javítás (sorry, nem lehet módosítani az előzőt) a sortört <TITLE>-t ís elbírja:

    //* MIMEDecodeStr() by 3delite //* Decode a MIME (Base64) encoded line function MIMEDecodeStr(MIMEStr: String): String; var Buffer: String; Botu: Char; i: Integer; StartPos, Offset: Int64; begin try Result := MIMEStr; StartPos := Pos('Q?', Result) + 2; if StartPos > 2 then begin Result := Copy(Result, StartPos, Length(Result)); Buffer := ''; Offset := 0; for i := 1 to Length(Result) - 2 do begin Botu := Result [i + Offset]; if Botu = '=' then begin Buffer := Buffer + Chr(HexToInt(Result [i + 1 + Offset] + Result [i + 2 + Offset])); Inc(Offset, 2); //Inc(i, 2); end else Buffer := Buffer + Result [i + Offset]; if i + Offset >= Length(Result) - 2 then Break; end; Result := ANSIReplaceStr(Buffer, '_', ' '); end; except //* end; end; //* GetHTMLTitle() by 3delite //* Get the Title of .html .htm .mht files function GetHTMLTitle(FileName: String): String; var Dok: TStrings; Fext, Text: String; i, k: Integer; StartPos, EndPos, Offset: Int64; begin Result := ''; if NOT FileExists(FileName) then Exit; Fext := AnsiUpperCase(ExtractFileExt(FileName)); try Dok := TStringList.Create; try Dok.LoadFromFile(FileName); if (Fext = '.HTM') OR (Fext = '.HTML') then begin //* <title>Title</title> //* Limit to 20KB Text := AnsiUpperCase(Copy(Dok.Text, 1, 1024 * 20)); StartPos := Pos('<TITLE>', Text) + 7; EndPos := Pos('</TITLE>', Text); if i > 0 then begin Result := Copy(Dok.Text, StartPos, EndPos - StartPos); Result := ANSIReplaceStr(Result, (#13#10), ' '); end; end; if (Fext = '.MHT') then begin //* Subject: Title //* Search is limited to only the first 16 lines k := 1; for i := 0 to 15 do begin if i >= Dok.Count then Break; if UpperCase(Copy(Dok[i], 1, 8)) = 'SUBJECT:' then begin Result := MIMEDecodeStr(Trim(Copy(Dok[i], 10, Length(Dok[i])))); while (UpperCase(Copy(Dok[i + k], 1, 5)) <> 'DATE:') AND (i <= Dok.Count) do begin Result := Result + MIMEDecodeStr(Trim(Copy(Dok[i + k], 1, Length(Dok[i + k])))); Inc(k); end; Break; end; end; end; if Result <> '' then begin Result := DecodeUnicode(Result); //Result := UTF8Decode(Result); Result := HTTPDecode(Result); Result := HTMLDecode(Result); end; except //* end; finally FreeAndNil(Dok); end; end;
    Mutasd a teljes hozzászólást!
Tetszett amit olvastál? Szeretnél a jövőben is értesülni a hasonló érdekességekről?
abcd