diff --git a/src/xz/file_io.c b/src/xz/file_io.c index cea83969..680412c1 100644 --- a/src/xz/file_io.c +++ b/src/xz/file_io.c @@ -21,6 +21,10 @@ static bool warn_fchown; #endif +#ifdef HAVE_DIRENT_H +# include +#endif + #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) # include #elif defined(HAVE__FUTIME) @@ -794,7 +798,7 @@ io_open_src_real(file_pair *pair) if (was_symlink) message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); - else { + else #endif { #ifdef _WIN32 @@ -805,7 +809,7 @@ io_open_src_real(file_pair *pair) if (errno == EACCES) { pair->is_directory = should_parse_dir_windows( pair->src_name); - return pair->is_directory; + return !pair->is_directory; } #else // Something else than O_NOFOLLOW failing @@ -815,7 +819,6 @@ io_open_src_real(file_pair *pair) strerror(errno)); #endif } - return true; } @@ -847,9 +850,13 @@ io_open_src_real(file_pair *pair) // Do not allow symlinks with recursive mode because this // could lead to a loop in the file system and thus infinite // recursion. If a symlink is detected, skip it. - // S_ISLNK and lstat() are not available with MSVC so these need - // to be in an #ifdef + // S_ISLNK and lstat() are not available with MSVC so these + // need to be in an #ifdef if (follow_symlinks) { +#ifdef _WIN32 + if (!should_parse_dir_windows(pair->src_name)) + goto error; +#else if (lstat(pair->src_name, &pair->src_st) != 0) goto error_msg; @@ -858,6 +865,7 @@ io_open_src_real(file_pair *pair) "directory, skipping"), pair->src_name); goto error; } +#endif } (void)close(pair->src_fd); @@ -1567,3 +1575,161 @@ io_write(file_pair *pair, const io_buf *buf, size_t size) return io_write_buf(pair, buf->u8, size); } + + +#if defined(_MSC_VER) || defined(HAVE_DIRENT_H) +struct directory_iter_s { +#if defined(_MSC_VER) + HANDLE dir; + + // The path must be saved because the call to + // directory_iterator_init() does not actually open + // the directory HANDLE. There is not a way to open + // the directory without reading the first entry. + // Instead, the search path is prepared in + // directory_iterator_init() so the first call to + // directory_iter_next() will be able to use the saved + // path. + char *path; + + // Windows uses FindFirstFile() to do the first search and + // open the HANDLE to the directory. After that, FindNextFile() + // must be used to continue the search. So this flag marks if + // FindFirstFile() or FindNextFile() should be used. + bool first; +#elif defined(HAVE_DIRENT_H) + DIR *dir; +#endif +}; + + +extern directory_iter * +directory_iterator_init(const char *path) +{ + directory_iter *iter = xmalloc(sizeof(directory_iter)); + +#ifdef _MSC_VER + iter->first = true; + + const size_t path_len = strlen(path); + char* path_search = xmalloc(path_len + 3); + memcpy(path_search, path, path_len); + + // The windows directory search functions take a regular expression + // instead of just the directory name. Since we want all files in + // the directory, we need to append the wildcard character (*) to + // the end of the path. + // + // Note: It does not matter if the path parameter ends with the + // path separator. The search path is not displayed and the + // proper path name extension is handled elsewhere. + path_search[path_len] = PATH_SEP; + path_search[path_len + 1] = '*'; + path_search[path_len + 2] = '\0'; + + iter->path = path_search; +#else + // On some platforms, opendir() interrupted so it is safest to + // block signals here. + signals_block(); + iter->dir = opendir(path); + signals_unblock(); + + if (iter->dir == NULL) { + free(iter); + message_error(_("%s: Error opening the directory: %s"), + path, strerror(errno)); + return NULL; + } +#endif + return iter; +} + + +extern bool +directory_iter_next(directory_iter *iter, char *entry, size_t *entry_len) +{ + bool next = true; + char *next_entry; + +#ifdef _MSC_VER + WIN32_FIND_DATA dir_entry; + if (iter->first) { + iter->dir = FindFirstFile(iter->path, &dir_entry); + + // The existence of the directory is checked in + // io_open_src_real() so its most likely this + // is an empty directory. + if (iter->dir == INVALID_HANDLE_VALUE) + next = false; + + iter->first = false; + } + else { + next = FindNextFile(iter->dir, &dir_entry); + } + + next_entry = dir_entry.cFileName; +#else + // The only way to check if an error occurred is by saving the + // old errno and comparing it to the errno after readdir() + // completes. readdir() will return NULL on error and if the + // directory has been parsed to completion. + int old_errno = errno; + struct dirent *dir_entry = readdir(iter->dir); + + if (dir_entry == NULL) { + // readdir() is not supposed to change the errno based on + // the POSIX standard. However the implementation used by + // MinGW-w64 will set errno to 0 on success. So if the errno + // was previously set it will falsely indicate and error. + if(old_errno != errno && errno != 0) + message_error(_("Error reading directory entry: %s"), + strerror(errno)); + next = false; + } + + next_entry = dir_entry->d_name; +#endif + + if (next) { + const size_t next_entry_len = strlen(next_entry); + + if (*entry_len <= next_entry_len) { + message_error(_("Unexpected directory entry " + "length.")); + *entry_len = 0; + return true; + } + + // Copy NULL terminator + memcpy(entry, next_entry, next_entry_len + 1); + *entry_len = next_entry_len; + } + + return next; +} + + +extern void +directory_iter_close(directory_iter *iter) +{ + if (iter != NULL) { +#ifdef _MSC_VER + if (iter->dir != INVALID_HANDLE_VALUE + && !FindClose(iter->dir)) { + DWORD err = GetLastError(); + message_windows_error("Error closing directory", err); + } + + free(iter->path); +#else + if(closedir(iter->dir)) + message_error(_("Error closing directory: %s"), + strerror(errno)); +#endif + free(iter); + } +} + +#endif //defined(_MSC_VER) || defined(HAVE_DIRENT_H) diff --git a/src/xz/file_io.h b/src/xz/file_io.h index 812b677f..40ed7577 100644 --- a/src/xz/file_io.h +++ b/src/xz/file_io.h @@ -26,6 +26,9 @@ # define stat _stat64 # define fstat _fstat64 # define off_t __int64 +# define PATH_SEP '\\' +#else +# define PATH_SEP '/' #endif @@ -193,3 +196,44 @@ extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos); /// \return On success, zero is returned. On error, -1 is returned /// and error message printed. extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); + +/// Opaque struct representing a directory iterator. This should be used +/// with directory_iterator_init(), directory_iter_next(), and +/// directory_iter_close(). +typedef struct directory_iter_s directory_iter; + + +/// @brief Creates a Directory Iterator +/// +/// This will create and initialize a directory_iter structure. +/// The pointer should not be freed and should instead be passed +/// to directory_iter_close() when it is no longer needed. +/// +/// @param path String path to a directory +/// +/// @return On success, a pointer to the directory iterator. +/// On error, NULL. +extern directory_iter * directory_iterator_init(const char* path); + + +/// @brief Iterate to the next directory entry +/// +/// @param iter Pointer to the iterator +/// @param entry Buffer to receive the next directory entry +/// @param entry_len Set this to the size of the entry buffer. On +/// success this is set to the string length of +/// the entry that was copied into entry (does not +/// count the NULL terminator). +/// +/// @return Returns true if there may be more entries. +/// Returns false otherwise. +extern bool directory_iter_next(directory_iter *iter, char *entry, + size_t *entry_len); + +/// @brief Close the Directory Iterator +/// +/// The cleans up the iterator by closing files and freeing +/// all needed memory. +/// +/// @param iter Pointer to the iterator to close +extern void directory_iter_close(directory_iter *iter); diff --git a/src/xz/main.c b/src/xz/main.c index 05e9f5e3..a6e1c840 100644 --- a/src/xz/main.c +++ b/src/xz/main.c @@ -19,6 +19,24 @@ # include #endif +/// The directory_list type is used in recursive mode to keep track of all +/// the directories that need processing. Its used a a queue to process +/// directories in the order they are discovered. Files, on the other hand +/// are processed right away to reduce the size of the queue and hence the +/// amount of memory needed to be allocated at any one time. +typedef struct directory_list_s { + /// Path to the directory. This is used as a pointer since it is + /// likely that most directories do not need the full possible file + /// path length allowed by systems. This saves memory in cases where + /// many directories need to be on the queue at the same time. + char *dir_path; + + /// Pointer to the next directory in the queue. This is only a + /// singly linked list since we only ever need to process the queue + /// in one direction. + struct directory_list_s *next; +} directory_list; + /// Exit status to use. This can be changed with set_exit_status(). static enum exit_status_type exit_status = E_SUCCESS; @@ -149,27 +167,184 @@ read_name(const args_info *args) static void process_entry(const char *path) { - // Set and possibly print the filename for the progress message. - message_filename(path); +#ifdef HAVE_DECODERS + if (opt_mode == MODE_LIST && path == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } +#endif // Open the entry file_pair *pair = io_open_src(path); if (pair == NULL) return; -#ifdef HAVE_DECODERS - if (opt_mode == MODE_LIST) { - if (path == stdin_filename) { - message_error(_("--list does not support reading from " - "standard input")); - return; +#if defined(_MSC_VER) || defined(HAVE_DIRENT_H) + // io_open_src() will return NULL if the path points to a directory + // and we aren't in recursive mode. So there is no need to check + // for recursive mode here. + if (pair->is_directory) { + // Create the queue of directories to process. The first + // item in the queue will be the base entry. The first item + // is dynamically allocated to simplify the memory freeing + // code later on. + directory_list *dir_list = xmalloc(sizeof(directory_list)); + + dir_list->dir_path = xstrdup(path); + + // Strip any trailing path separators at the end of the + // directory. This makes the path compatible with Windows + // MSVC search functions and makes the output look nicer. + for (size_t i = strlen(path) - 1; dir_list->dir_path[i] + == PATH_SEP && i > 1; i--) { + dir_list->dir_path[i] = '\0'; } - list_file(pair); + dir_list->next = NULL; + + // The current pointer represents the directory we are + // currently processing. To start, it is initialzed as the + // base entry. + directory_list *current = dir_list; + + // The pointer to the last item in the queue is used to + // append new directories. + directory_list *last = dir_list; + do { + directory_list* next; + + // The iterator initialization will return NULL and + // print an error message if there is any kind of + // problem. In this case, we can simply continue on + // to the next directory to process. + directory_iter *iter = directory_iterator_init( + current->dir_path); + + // The error message is printed during + // directory_iterator_init(), so no need to print + // anything before proceeding to the next iteration. + if (iter == NULL) + goto next_iteration; + + const size_t dir_path_len = strlen(current->dir_path); + + // Set ENTRY_LEN_MAX depending on the system. On + // POSIX systems, NAME_MAX will be defined in + // . On Windows, the directory parsing + // functions have buffers of size MAX_PATH. +#ifdef TUKLIB_DOSLIKE +# define ENTRY_LEN_MAX MAX_PATH +#else +# define ENTRY_LEN_MAX NAME_MAX +#endif + char entry[ENTRY_LEN_MAX + 1]; + size_t entry_len; + + // The entry_len must be reset each iteration because + // directory_iter_next() will only write to the entry + // buffer if it can write the entire entry name. If the + // value is not reset each time, it will limit the + // next entry size based on the last entry's size. + while ((entry_len = ENTRY_LEN_MAX) + && directory_iter_next(iter, entry, + &entry_len)) { + // Extend current directory path with + // new entry. + if (entry_len == 0) + continue; + + // Check for '.' and '..' since there is no + // point in processing them. + if (entry[0] == '.' && ((entry[1] == '.' + && entry[2] == '\0') + || entry[1] == '\0')) + continue; + + // The total entry size needs the "+2" to + // make room for the directory path separator + // and the NULL terminator. + const size_t total_size = entry_len + dir_path_len + 2; + char *entry_path = xmalloc(total_size); + + memcpy(entry_path, current->dir_path, dir_path_len); + + char *entry_copy_start = entry_path + dir_path_len; + + entry_path[dir_path_len] = PATH_SEP; + entry_copy_start++; + + memcpy(entry_copy_start, entry, entry_len + 1); + + // Try to open the next entry. If it is a file + // it will be processed immediately. If it is a + // directory it will be added to the queue to + // be processed later. Processing files right + // away reduces the amount of memory needed + // for queue nodes and stored file paths. + // Exploring directories only increases the + // amount of memory needed so its better to + // prioritize processing files as early as + // possible. + pair = io_open_src(entry_path); + + if (pair == NULL) { + free(entry_path); + continue; + } + + if (pair->is_directory) { + directory_list *next_dir = xmalloc( + sizeof(directory_list)); + next_dir->dir_path = entry_path; + next_dir->next = NULL; + last->next = next_dir; + last = next_dir; + } else if (entry[0] == '.' + && opt_mode == MODE_COMPRESS + && !opt_keep_original) { + message_warning(_("%s: Hidden file " + "skipped during recursive " + "compression mode. Use --keep " + "to process these files.\n"), + entry_path); + free(entry_path); + } else { + + message_filename(entry_path); +#ifdef HAVE_DECODERS + if (opt_mode == MODE_LIST) + list_file(pair); + else +#endif + coder_run(pair); + free(entry_path); + } + } + + directory_iter_close(iter); +next_iteration: + next = current->next; + + free(current->dir_path); + free(current); + + current = next; + } while (current != NULL); + return; } -#endif +#endif // defined(_MSC_VER) || defined(HAVE_DIRENT_H) + +// Set and possibly print the filename for the progress message. +message_filename(path); + +#ifdef HAVE_DECODERS + if (opt_mode == MODE_LIST) + list_file(pair); + else +#endif coder_run(pair); }