mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-09 17:44:48 +09:00
Respect general-purpose bit flags when decoding ZipArchiveEntry names and comments (#103271)
If bit 11 in the general purpose bit flags is set, forces the use of UTF-8 instead of the encoding specified in the ZipArchive constructor.
This commit is contained in:
parent
0ea5ea712e
commit
6cdc448779
6 changed files with 120 additions and 61 deletions
|
@ -4,6 +4,7 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
|
||||
|
@ -499,5 +500,17 @@ namespace System.IO.Compression.Tests
|
|||
yield return e;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns pairs encoded with Latin1, but decoded with UTF8.
|
||||
// Returns: originalComment, expectedComment, transcoded expectedComment
|
||||
public static IEnumerable<object[]> MismatchingEncodingComment_Data()
|
||||
{
|
||||
foreach (object[] e in Latin1Comment_Data())
|
||||
{
|
||||
byte[] expectedBytes = Encoding.Latin1.GetBytes(e[1] as string);
|
||||
|
||||
yield return new object[] { e[0], e[1], Encoding.UTF8.GetString(expectedBytes) };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,39 +102,39 @@ namespace System.IO.Compression
|
|||
/// If the file exists and is not a Zip file, a <code>ZipArchiveException</code> will be thrown.
|
||||
/// If the file exists and is empty or does not exist, a new Zip file will be created.
|
||||
/// Note that creating a Zip file with the <code>ZipArchiveMode.Create</code> mode is more efficient when creating a new Zip file.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
|
||||
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
|
||||
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
|
||||
/// UTF-8 encoding for entry names.<br />
|
||||
/// UTF-8 encoding for entry names or comments.<br />
|
||||
/// This value is used as follows:</para>
|
||||
/// <para><strong>Reading (opening) ZIP archive files:</strong></para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name.</item>
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name.</item>
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para><strong>Writing (saving) ZIP archive files:</strong></para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entry names that contain characters outside the ASCII range,
|
||||
/// <item>For entry names or comments that contain characters outside the ASCII range,
|
||||
/// the language encoding flag (EFS) will be set in the general purpose bit flag of the local file header,
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name into bytes.</item>
|
||||
/// <item>For entry names that do not contain characters outside the ASCII range,
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name and comment into bytes.</item>
|
||||
/// <item>For entry names or comments that do not contain characters outside the ASCII range,
|
||||
/// the language encoding flag (EFS) will not be set in the general purpose bit flag of the local file header,
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names into bytes.</item>
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names and comments into bytes.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names into bytes.
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names and comments into bytes.
|
||||
/// The language encoding flag (EFS) in the general purpose bit flag of the local file header will be set if and only
|
||||
/// if the specified <c>entryNameEncoding</c> is a UTF-8 encoding.</item>
|
||||
/// </list>
|
||||
|
@ -322,23 +322,23 @@ namespace System.IO.Compression
|
|||
/// <param name="includeBaseDirectory"><code>true</code> to indicate that a directory named <code>sourceDirectoryName</code> should
|
||||
/// be included at the root of the archive. <code>false</code> to indicate that the files and directories in <code>sourceDirectoryName</code>
|
||||
/// should be included directly in the archive.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
|
||||
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
|
||||
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
|
||||
/// UTF-8 encoding for entry names.<br />
|
||||
/// UTF-8 encoding for entry names or comments.<br />
|
||||
/// This value is used as follows while creating the archive:</para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For file names that contain characters outside the ASCII range:<br />
|
||||
/// <item>For file names or comments that contain characters outside the ASCII range:<br />
|
||||
/// The language encoding flag (EFS) will be set in the general purpose bit flag of the local file header of the corresponding entry,
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name into bytes.</item>
|
||||
/// <item>For file names that do not contain characters outside the ASCII range:<br />
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name and comment into bytes.</item>
|
||||
/// <item>For file names or comments that do not contain characters outside the ASCII range:<br />
|
||||
/// the language encoding flag (EFS) will not be set in the general purpose bit flag of the local file header of the corresponding entry,
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names into bytes.</item>
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names and comments into bytes.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names into bytes.
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names and comments into bytes.
|
||||
/// The language encoding flag (EFS) in the general purpose bit flag of the local file header for each entry will be set if and only
|
||||
/// if the specified <c>entryNameEncoding</c> is a UTF-8 encoding.</item>
|
||||
/// </list>
|
||||
|
@ -408,7 +408,7 @@ namespace System.IO.Compression
|
|||
/// <param name="destination">The stream where the zip archive is to be stored.</param>
|
||||
/// <param name="compressionLevel">One of the enumeration values that indicates whether to emphasize speed or compression effectiveness when creating the entry.</param>
|
||||
/// <param name="includeBaseDirectory"><see langword="true" /> to include the directory name from <paramref name="sourceDirectoryName" /> at the root of the archive; <see langword="false" /> to include only the contents of the directory.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names or comments.</param>
|
||||
/// <remarks>
|
||||
/// The directory structure from the file system is preserved in the archive. If the directory is empty, an empty archive is created.
|
||||
/// Use this method overload to specify the compression level and character encoding, and whether to include the base directory in the archive.
|
||||
|
|
|
@ -101,24 +101,24 @@ namespace System.IO.Compression
|
|||
///
|
||||
/// <param name="sourceArchiveFileName">The path to the archive on the file system that is to be extracted.</param>
|
||||
/// <param name="destinationDirectoryName">The path to the directory on the file system. The directory specified must not exist, but the directory that it is contained in must exist.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
|
||||
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
|
||||
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
|
||||
/// UTF-8 encoding for entry names.<br />
|
||||
/// UTF-8 encoding for entry names or comments.<br />
|
||||
/// This value is used as follows:</para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name.</item>
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name.</item>
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>Note that Unicode encodings other than UTF-8 may not be currently used for the <c>entryNameEncoding</c>,
|
||||
/// otherwise an <see cref="ArgumentException"/> is thrown.</para>
|
||||
|
@ -156,24 +156,24 @@ namespace System.IO.Compression
|
|||
/// <param name="sourceArchiveFileName">The path to the archive on the file system that is to be extracted.</param>
|
||||
/// <param name="destinationDirectoryName">The path to the directory in which to place the extracted files, specified as a relative or absolute path. A relative path is interpreted as relative to the current working directory.</param>
|
||||
/// <param name="overwriteFiles">True to indicate overwrite.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
|
||||
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
|
||||
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
|
||||
/// UTF-8 encoding for entry names.<br />
|
||||
/// UTF-8 encoding for entry names or comments.<br />
|
||||
/// This value is used as follows:</para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name.</item>
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name.</item>
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>Note that Unicode encodings other than UTF-8 may not be currently used for the <c>entryNameEncoding</c>,
|
||||
/// otherwise an <see cref="ArgumentException"/> is thrown.</para>
|
||||
|
@ -250,17 +250,17 @@ namespace System.IO.Compression
|
|||
/// </summary>
|
||||
/// <param name="source">The stream from which the zip archive is to be extracted.</param>
|
||||
/// <param name="destinationDirectoryName">The path to the directory in which to place the extracted files, specified as a relative or absolute path. A relative path is interpreted as relative to the current working directory.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names or comments.</param>
|
||||
/// <remarks> This method creates the specified directory and all subdirectories. The destination directory cannot already exist.
|
||||
/// Exceptions related to validating the paths in the <paramref name="destinationDirectoryName"/> or the files in the zip archive contained in <paramref name="source"/> parameters are thrown before extraction. Otherwise, if an error occurs during extraction, the archive remains partially extracted.
|
||||
/// Each extracted file has the same relative path to the directory specified by <paramref name="destinationDirectoryName"/> as its source entry has to the root of the archive.
|
||||
/// If a file to be archived has an invalid last modified time, the first date and time representable in the Zip timestamp format (midnight on January 1, 1980) will be used.</remarks>
|
||||
/// If <paramref name="entryNameEncoding"/> is set to a value other than <see langword="null"/>, entry names are decoded according to the following rules:
|
||||
/// - For entry names where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, the entry names are decoded by using the specified encoding.
|
||||
/// - For entries where the language encoding flag is set, the entry names are decoded by using UTF-8.
|
||||
/// If <paramref name="entryNameEncoding"/> is set to <see langword="null"/>, entry names are decoded according to the following rules:
|
||||
/// - For entries where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, entry names are decoded by using the current system default code page.
|
||||
/// - For entries where the language encoding flag is set, the entry names are decoded by using UTF-8.
|
||||
/// If <paramref name="entryNameEncoding"/> is set to a value other than <see langword="null"/>, entry names and comments are decoded according to the following rules:
|
||||
/// - For entry names and comments where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, the entry names and comments are decoded by using the specified encoding.
|
||||
/// - For entries where the language encoding flag is set, the entry names and comments are decoded by using UTF-8.
|
||||
/// If <paramref name="entryNameEncoding"/> is set to <see langword="null"/>, entry names and comments are decoded according to the following rules:
|
||||
/// - For entries where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, entry names and comments are decoded by using the current system default code page.
|
||||
/// - For entries where the language encoding flag is set, the entry names and comments are decoded by using UTF-8.
|
||||
/// <exception cref="ArgumentException"><paramref name="destinationDirectoryName" />> is <see cref="string.Empty" />, contains only white space, or contains at least one invalid character.
|
||||
/// -or-
|
||||
/// <paramref name="entryNameEncoding"/> is set to a Unicode encoding other than UTF-8.</exception>
|
||||
|
@ -287,18 +287,18 @@ namespace System.IO.Compression
|
|||
/// </summary>
|
||||
/// <param name="source">The stream from which the zip archive is to be extracted.</param>
|
||||
/// <param name="destinationDirectoryName">The path to the directory in which to place the extracted files, specified as a relative or absolute path. A relative path is interpreted as relative to the current working directory.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names or comments.</param>
|
||||
/// <param name="overwriteFiles"><see langword="true" /> to overwrite files; <see langword="false" /> otherwise.</param>
|
||||
/// <remarks> This method creates the specified directory and all subdirectories. The destination directory cannot already exist.
|
||||
/// Exceptions related to validating the paths in the <paramref name="destinationDirectoryName"/> or the files in the zip archive contained in <paramref name="source"/> parameters are thrown before extraction. Otherwise, if an error occurs during extraction, the archive remains partially extracted.
|
||||
/// Each extracted file has the same relative path to the directory specified by <paramref name="destinationDirectoryName"/> as its source entry has to the root of the archive.
|
||||
/// If a file to be archived has an invalid last modified time, the first date and time representable in the Zip timestamp format (midnight on January 1, 1980) will be used.</remarks>
|
||||
/// If <paramref name="entryNameEncoding"/> is set to a value other than <see langword="null"/>, entry names are decoded according to the following rules:
|
||||
/// - For entry names where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, the entry names are decoded by using the specified encoding.
|
||||
/// - For entries where the language encoding flag is set, the entry names are decoded by using UTF-8.
|
||||
/// If <paramref name="entryNameEncoding"/> is set to <see langword="null"/>, entry names are decoded according to the following rules:
|
||||
/// If <paramref name="entryNameEncoding"/> is set to a value other than <see langword="null"/>, entry names and comments are decoded according to the following rules:
|
||||
/// - For entry names and comments where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, the entry names and comments are decoded by using the specified encoding.
|
||||
/// - For entries where the language encoding flag is set, the entry names and comments are decoded by using UTF-8.
|
||||
/// If <paramref name="entryNameEncoding"/> is set to <see langword="null"/>, entry names and comments are decoded according to the following rules:
|
||||
/// - For entries where the language encoding flag (in the general-purpose bit flag of the local file header) is not set, entry names are decoded by using the current system default code page.
|
||||
/// - For entries where the language encoding flag is set, the entry names are decoded by using UTF-8.
|
||||
/// - For entries where the language encoding flag is set, the entry names and comments are decoded by using UTF-8.
|
||||
/// <exception cref="ArgumentException"><paramref name="destinationDirectoryName" />> is <see cref="string.Empty" />, contains only white space, or contains at least one invalid character.
|
||||
/// -or-
|
||||
/// <paramref name="entryNameEncoding"/> is set to a Unicode encoding other than UTF-8.</exception>
|
||||
|
|
|
@ -77,7 +77,7 @@ namespace System.IO.Compression
|
|||
/// <param name="stream">The input or output stream.</param>
|
||||
/// <param name="mode">See the description of the ZipArchiveMode enum. Read requires the stream to support reading, Create requires the stream to support writing, and Update requires the stream to support reading, writing, and seeking.</param>
|
||||
/// <param name="leaveOpen">true to leave the stream open upon disposing the ZipArchive, otherwise false.</param>
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
|
||||
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
|
||||
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
|
||||
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
|
||||
/// UTF-8 encoding for entry names.<br />
|
||||
|
@ -86,30 +86,30 @@ namespace System.IO.Compression
|
|||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name.</item>
|
||||
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name.</item>
|
||||
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name and comment.</item>
|
||||
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
|
||||
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
|
||||
/// </list>
|
||||
/// <para><strong>Writing (saving) ZIP archive files:</strong></para>
|
||||
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>For entry names that contain characters outside the ASCII range,
|
||||
/// <item>For entry names and comments that contain characters outside the ASCII range,
|
||||
/// the language encoding flag (EFS) will be set in the general purpose bit flag of the local file header,
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name into bytes.</item>
|
||||
/// <item>For entry names that do not contain characters outside the ASCII range,
|
||||
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name and comment into bytes.</item>
|
||||
/// <item>For entry names and comments that do not contain characters outside the ASCII range,
|
||||
/// the language encoding flag (EFS) will not be set in the general purpose bit flag of the local file header,
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names into bytes.</item>
|
||||
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names and comments into bytes.</item>
|
||||
/// </list>
|
||||
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
|
||||
/// <list>
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names into bytes.
|
||||
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names and comments into bytes.
|
||||
/// The language encoding flag (EFS) in the general purpose bit flag of the local file header will be set if and only
|
||||
/// if the specified <c>entryNameEncoding</c> is a UTF-8 encoding.</item>
|
||||
/// </list>
|
||||
|
|
|
@ -77,7 +77,7 @@ namespace System.IO.Compression
|
|||
_outstandingWriteStream = null;
|
||||
|
||||
_storedEntryNameBytes = cd.Filename;
|
||||
_storedEntryName = (_archive.EntryNameAndCommentEncoding ?? Encoding.UTF8).GetString(_storedEntryNameBytes);
|
||||
_storedEntryName = DecodeEntryString(_storedEntryNameBytes);
|
||||
DetectEntryNameVersion();
|
||||
|
||||
_lhUnknownExtraFields = null;
|
||||
|
@ -200,7 +200,7 @@ namespace System.IO.Compression
|
|||
[AllowNull]
|
||||
public string Comment
|
||||
{
|
||||
get => (_archive.EntryNameAndCommentEncoding ?? Encoding.UTF8).GetString(_fileComment);
|
||||
get => DecodeEntryString(_fileComment);
|
||||
set
|
||||
{
|
||||
_fileComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, _archive.EntryNameAndCommentEncoding, ushort.MaxValue, out bool isUTF8);
|
||||
|
@ -352,6 +352,18 @@ namespace System.IO.Compression
|
|||
return FullName;
|
||||
}
|
||||
|
||||
private string DecodeEntryString(byte[] entryStringBytes)
|
||||
{
|
||||
Debug.Assert(entryStringBytes != null);
|
||||
|
||||
Encoding readEntryStringEncoding =
|
||||
(_generalPurposeBitFlag & BitFlagValues.UnicodeFileNameAndComment) == BitFlagValues.UnicodeFileNameAndComment
|
||||
? Encoding.UTF8
|
||||
: _archive?.EntryNameAndCommentEncoding ?? Encoding.UTF8;
|
||||
|
||||
return readEntryStringEncoding.GetString(entryStringBytes);
|
||||
}
|
||||
|
||||
// Only allow opening ZipArchives with large ZipArchiveEntries in update mode when running in a 64-bit process.
|
||||
// This is for compatibility with old behavior that threw an exception for all process bitnesses, because this
|
||||
// will not work in a 32-bit process.
|
||||
|
|
|
@ -33,31 +33,65 @@ namespace System.IO.Compression.Tests
|
|||
public static void Create_Comment_Utf8EntryName_Utf8Encoding(string originalComment, string expectedComment) =>
|
||||
Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, Encoding.UTF8);
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(Utf8Comment_Data))]
|
||||
public static void Create_Comment_Utf8EntryName_Utf8Encoding_Default(string originalComment, string expectedComment) =>
|
||||
Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, expectedComment, Encoding.UTF8, null);
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(Latin1Comment_Data))]
|
||||
public static void Create_Comment_Utf8EntryName_Latin1Encoding(string originalComment, string expectedComment) =>
|
||||
// Emoji not supported by latin1
|
||||
Create_Comment_EntryName_Encoding_Internal(Utf8AndLatin1FileName, originalComment, expectedComment, Encoding.Latin1);
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(Utf8Comment_Data))]
|
||||
public static void Create_Comment_Utf8EntryName_Utf8Encoding_Prioritised(string originalComment, string expectedComment)
|
||||
// UTF8 encoding bit is set in the general-purpose bit flags. The verification encoding of Latin1 should be ignored
|
||||
=> Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, expectedComment, Encoding.UTF8, Encoding.Latin1);
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(MismatchingEncodingComment_Data))]
|
||||
public static void Create_Comment_AsciiEntryName_Utf8Decoding_Invalid(string originalComment, string expectedPreWriteComment, string expectedPostWriteComment)
|
||||
// The UTF8 encoding bit in the general-purpose bit flags should not be set, filenames should be encoded with Latin1, and thus
|
||||
// decoding with UTF8 should result in incorrect filenames. This is because the filenames and comments contain code points in the
|
||||
// range 0xC0..0xFF (which Latin1 encodes in one byte, and UTF8 encodes in two bytes.)
|
||||
=> Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedPreWriteComment, expectedPostWriteComment, Encoding.Latin1, Encoding.UTF8);
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(MismatchingEncodingComment_Data))]
|
||||
public static void Create_Comment_AsciiEntryName_DefaultDecoding_Utf8(string originalComment, string expectedPreWriteComment, string expectedPostWriteComment)
|
||||
// Filenames should be encoded with Latin1, resulting in the UTF8 encoding bit in the general-purpose bit flags not being set.
|
||||
// However, failing to specify an encoding (or specifying a null encoding) for the read should result in UTF8 being used anyway.
|
||||
// This should result in incorrect filenames, since the filenames and comments contain code points in the range 0xC0..0xFF (which
|
||||
// Latin1 encodes in one byte, and UTF8 encodes in two bytes.)
|
||||
=> Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedPreWriteComment, expectedPostWriteComment, Encoding.Latin1, null);
|
||||
|
||||
private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment, string expectedComment, Encoding encoding)
|
||||
=> Create_Comment_EntryName_Encoding_Internal(entryName, originalComment, expectedComment, expectedComment, encoding, encoding);
|
||||
|
||||
private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment,
|
||||
string expectedPreWriteComment, string expectedPostWriteComment,
|
||||
Encoding creationEncoding, Encoding verificationEncoding)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
|
||||
using (var zip = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true, encoding))
|
||||
using (var zip = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true, creationEncoding))
|
||||
{
|
||||
ZipArchiveEntry entry = zip.CreateEntry(entryName, CompressionLevel.NoCompression);
|
||||
entry.Comment = originalComment;
|
||||
Assert.Equal(expectedComment, entry.Comment);
|
||||
// The expected pre-write and post-write comment can be different when testing encodings which vary between operations.
|
||||
Assert.Equal(expectedPreWriteComment, entry.Comment);
|
||||
}
|
||||
|
||||
using (var zip = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: false, encoding))
|
||||
using (var zip = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: false, verificationEncoding))
|
||||
{
|
||||
foreach (ZipArchiveEntry entry in zip.Entries)
|
||||
{
|
||||
Assert.Equal(entryName, entry.Name);
|
||||
Assert.Equal(expectedComment, entry.Comment);
|
||||
Assert.Equal(expectedPostWriteComment, entry.Comment);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue