Skip to content

Commit

Permalink
Merge pull request ZipArchive#443 from palmin/legacy-filename-encoding
Browse files Browse the repository at this point in the history
don't assume entries made from dos have utf-8 filename unless flag says so
  • Loading branch information
Coeur authored Apr 3, 2018
2 parents e6fcf48 + 4410d42 commit 890ddbf
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions SSZipArchive/SSZipArchive.m
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,10 @@ + (BOOL)unzipFileAtPath:(NSString *)path

BOOL fileIsSymbolicLink = _fileIsSymbolicLink(&fileInfo);

NSString * strPath = [SSZipArchive _filenameStringWithCString:filename size:fileInfo.size_filename];
NSString * strPath = [SSZipArchive _filenameStringWithCString:filename
version_made_by:fileInfo.version
general_purpose_flag:fileInfo.flag
size:fileInfo.size_filename];
if ([strPath hasPrefix:@"__MACOSX/"]) {
// ignoring resource forks: https://superuser.com/questions/104500/what-is-macosx-folder
unzCloseCurrentFile(zip);
Expand Down Expand Up @@ -856,8 +859,37 @@ - (BOOL)close

#pragma mark - Private

+ (NSString *)_filenameStringWithCString:(const char *)filename size:(uint16_t)size_filename
{
+ (NSString *)_filenameStringWithCString:(const char *)filename
version_made_by:(uint16_t)version_made_by
general_purpose_flag:(uint16_t)flag
size:(uint16_t)size_filename {

// Respect Language encoding flag only reading filename as UTF-8 when this is set
// when file entry created on dos system.
//
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
// Bit 11: Language encoding flag (EFS). If this bit is set,
// the filename and comment fields for this file
// MUST be encoded using UTF-8. (see APPENDIX D)
uint16_t made_by = version_made_by >> 8;
BOOL made_on_dos = made_by == 0;
BOOL languageEncoding = (flag & (1 << 11)) != 0;
if(!languageEncoding && made_on_dos) {
// APPNOTE.TXT D.1:
// D.2 If general purpose bit 11 is unset, the file name and comment should conform
// to the original ZIP character encoding. If general purpose bit 11 is set, the
// filename and comment must support The Unicode Standard, Version 4.1.0 or
// greater using the character encoding form defined by the UTF-8 storage
// specification. The Unicode Standard is published by the The Unicode
// Consortium (www.unicode.org). UTF-8 encoded data stored within ZIP files
// is expected to not include a byte order mark (BOM).

// Code Page 437 corresponds to kCFStringEncodingDOSLatinUS
NSStringEncoding encoding = CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingDOSLatinUS);
NSString* strPath = [NSString stringWithCString:filename encoding:encoding];
if(strPath) return strPath;
}

// attempting unicode encoding
NSString * strPath = @(filename);
if (strPath) {
Expand Down

0 comments on commit 890ddbf

Please sign in to comment.