From 87256aa202f85f342e6fa188cbcfae718e919b10 Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Mon, 3 Apr 2023 18:23:53 -0400 Subject: [PATCH 1/7] correct upload command --- setup.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index ee258aa..092cef7 100755 --- a/setup.sh +++ b/setup.sh @@ -107,7 +107,8 @@ lint() upload() { python3 -m build - python3 -m twine upload dist/*.tar.gz dist/*-none-any.whl --skip-existing + dist=$(find dist -name "*.tar.gz" -o -name "*-none-any.whl") + python3 -m twine upload $dist --skip-existing } # install and activate the virtual environment -- GitLab From c2d42e993869d1d1da7d519dcb671d67f5f995f3 Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 04:05:29 +0200 Subject: [PATCH 2/7] add file contents checking --- pyproject.toml | 4 + src/treecker/core/comparison.py | 94 -------------- src/treecker/core/contents.py | 220 ++++++++++++++++++++++++++++++++ src/treecker/core/display.py | 76 +++++++++++ src/treecker/core/naming.py | 63 ++------- src/treecker/core/tree.py | 46 +++++++ src/treecker/default.conf | 12 +- src/treecker/main/commit.py | 3 +- src/treecker/main/issues.py | 35 ++++- src/treecker/main/status.py | 3 +- tests/references.json | 6 +- tests/run.py | 6 +- 12 files changed, 412 insertions(+), 156 deletions(-) delete mode 100644 src/treecker/core/comparison.py create mode 100644 src/treecker/core/contents.py create mode 100644 src/treecker/core/display.py diff --git a/pyproject.toml b/pyproject.toml index e415995..c47c7f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,10 @@ classifiers = [ "Topic :: System :: Filesystems", "Intended Audience :: End Users/Desktop", ] +dependencies = [ + "Pillow>=9.5.0", + "PyPDF2>=3.0.0", +] [tool.setuptools_scm] write_to = "src/treecker/_version.py" diff --git a/src/treecker/core/comparison.py b/src/treecker/core/comparison.py deleted file mode 100644 index 3c02d53..0000000 --- a/src/treecker/core/comparison.py +++ /dev/null @@ -1,94 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Comparison module. - -This module implements the comparison between two trees. - -""" - -from logging import getLogger -from pathlib import Path - -from treecker import config -from treecker.core.colors import colorize - - -logger = getLogger(__name__) - - -def get_differences(old, new, hashing, path=None): - """Return a list of the differences between two tree objects. - - Parameters - ---------- - old : dict - Old directory node. - new : dict - New directory node. - hashing : bool - Compare file hash values. - path : list - Initial path. - - Returns - ------- - list - Differences betwen the two nodes. - - """ - logger.debug("getting differences at %s", path) - if path is None: - path = [] - listing = [] - if isinstance(old, dict) and isinstance(new, dict): - for node in old: - if node in new: - listing += get_differences( - old[node], - new[node], - hashing, - path+[node], - ) - else: - listing.append({'type': 'removed', 'path': path+[node]}) - for node in new: - if node not in old: - listing.append({'type': 'added', 'path': path+[node]}) - elif isinstance(old, dict) or isinstance(new, dict): - listing.append({'type': 'removed', 'path': path}) - listing.append({'type': 'added', 'path': path}) - elif (old[0] != new[0]) or hashing and (old[1] != new[1]): - listing.append({'type': 'edited', 'path': path}) - return listing - - -def differences_log(differences): - """Return a printable log of the differences. - - Parameters - ---------- - differences : list - List of differences. - - Returns - ------- - str - Differences log. - - """ - logger.debug("creating difference log") - color, symbol = {}, {} - for name, value in config[__name__].items(): - if name.startswith('color_'): - color[name[6:]] = value - elif name.startswith('symbol_'): - symbol[name[7:]] = value - lines = [] - for diff in differences: - path = Path(*diff['path']) - line = colorize(f"{symbol[diff['type']]} {path}", color[diff['type']]) - lines.append(line) - if len(differences) == 0: - lines.append("no change found") - log = "\n".join(lines) - return log diff --git a/src/treecker/core/contents.py b/src/treecker/core/contents.py new file mode 100644 index 0000000..eabeb5b --- /dev/null +++ b/src/treecker/core/contents.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- + +"""Contents module. + +This module implements the file contents check. + +""" + +from fnmatch import fnmatch +from logging import getLogger +from platform import system +from tarfile import open as tar_open, TarError +from zipfile import BadZipFile, ZipFile + +from PIL import Image, UnidentifiedImageError +from PyPDF2 import PdfReader +from PyPDF2.errors import PyPdfError + +from treecker import config + + +logger = getLogger(__name__) + +Image.MAX_IMAGE_PIXELS = None + + +def pdf_issues(path): + """Return the problems encountered with the PDF file. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + with open(path, 'rb') as file: + try: + PdfReader(file, strict=False) + return [] + except PyPdfError as exception: + return [str(exception)] + + +def picture_issues(path): + """Return the problems encountered with the picture. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + try: + image = Image.open(path) + image.verify() + return [] + except UnidentifiedImageError as exception: + return [str(exception)] + + +def tar_issues(path): + """Return the problems encountered with the TAR archive. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + try: + with tar_open(path, 'r') as tar: + tar.getnames() + return [] + except TarError as exception: + return [str(exception)] + + +def zip_issues(path): + """Return the problems encountered with the ZIP archive. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + try: + with ZipFile(path) as archive: + archive.namelist() + return [] + except BadZipFile as exception: + return [str(exception)] + + +def line_endings(path): + """Return the number of each line ending. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + dict + Number of occurrences of each ending. + + """ + endings = ( + b'\r\n', + b'\n\r', + b'\n', + b'\r', + ) + counter = dict.fromkeys(endings, 0) + with open(path, 'rb') as file: + for line in file: + for ending in endings: + if line.endswith(ending): + counter[ending] += 1 + break + return counter + + +def text_issues(path): + """Return the problems encountered with the text file. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + issues = [] + if system() == 'Linux': + counter = line_endings(path) + for ending in (b'\r', b'\n\r', b'\r\n'): + if counter[ending]: + issues.append(f"contains {counter[ending]} {ending}") + return issues + + +def is_text(path): + """Return if the file is a text file. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + bool + True if the file is a text file. + + """ + size = config.getint(__name__, "block_size") + delete = {7, 8, 9, 10, 12, 13, 27} | set(range(32, 256)) - {127} + delete = bytearray(delete) + with open(path, 'rb') as file: + series = file.read(size) + while len(series) > 0: + filtered = bool(series.translate(None, delete)) + if filtered: + return False + series = file.read(size) + return True + + +def file_issues(path): + """Return the problems encountered with the file. + + Parameters + ---------- + path : str + Path of the file to be checked. + + Returns + ------- + list of str + Error messages. + + """ + mapping = { + 'patterns_pdf': pdf_issues, + 'patterns_pic': picture_issues, + 'patterns_tar': tar_issues, + 'patterns_zip': zip_issues, + } + for key, function in mapping.items(): + if any(fnmatch(path, pat) for pat in config[__name__][key].split()): + return function(path) + if is_text(path): + return text_issues(path) + return [] diff --git a/src/treecker/core/display.py b/src/treecker/core/display.py new file mode 100644 index 0000000..17a9b43 --- /dev/null +++ b/src/treecker/core/display.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- + +"""Display module. + +This module implements modification and error display. + +""" + +from logging import getLogger +from pathlib import Path + +from treecker import config +from treecker.core.colors import colorize + + +logger = getLogger(__name__) + + +def differences_log(differences): + """Return a printable log of the differences. + + Parameters + ---------- + differences : list + List of differences. + + Returns + ------- + str + Differences log. + + """ + logger.debug("creating difference log") + color, symbol = {}, {} + for name, value in config[__name__].items(): + if name.startswith('color_'): + color[name[6:]] = value + elif name.startswith('symbol_'): + symbol[name[7:]] = value + lines = [] + for diff in differences: + path = Path(*diff['path']) + line = colorize(f"{symbol[diff['type']]} {path}", color[diff['type']]) + lines.append(line) + if len(differences) == 0: + lines.append("no change found") + log = "\n".join(lines) + return log + + +def issues_log(issues): + """Return a printable log of the issues. + + Parameters + ---------- + issues : list + Issues. + + Returns + ------- + str + Issues log. + + """ + logger.debug("creating issue log") + lines = [] + color = config.get(__name__, 'color_issue') + for issue in issues: + path = Path(*issue['path']) + text = issue['text'] + line = f'{path} {colorize(text, color)}' + lines.append(line) + if len(issues) == 0: + lines.append("no issue found") + log = "\n".join(lines) + return log diff --git a/src/treecker/core/naming.py b/src/treecker/core/naming.py index 4b595bf..0919823 100644 --- a/src/treecker/core/naming.py +++ b/src/treecker/core/naming.py @@ -2,77 +2,40 @@ """Naming module. -This module implements the naming check. +This module implements the file naming check. """ from fnmatch import fnmatch from logging import getLogger -from pathlib import Path +from os.path import basename from re import fullmatch from treecker import config -from treecker.core.colors import colorize logger = getLogger(__name__) -def get_issues(tree, path=None): +def name_issues(path): """Return a list of the naming issues. Parameters ---------- - tree : dict - Directory node. - path : list - Initial path. + path : str + Path of the file to be checked. Returns ------- - list - Issues. + list of str + Error messages. """ - logger.debug("getting issues at %s", path) - if path is None: - path = [] - listing = [] + logger.debug("getting naming issues for %s", path) + name = basename(path) pattern = config.get(__name__, 'match_pattern') ignore = config.get(__name__, 'ignore_patterns').split() - if isinstance(tree, dict): - for name, child in tree.items(): - if fullmatch(pattern, name) is None: - if not any(fnmatch(name, pattern) for pattern in ignore): - text = f"{name} does not match {pattern}" - listing.append({'text': text, 'path': path+[name]}) - listing += get_issues(child, path+[name]) - return listing - - -def issues_log(issues): - """Return a printable log of the naming issues. - - Parameters - ---------- - issues : list - Issues. - - Returns - ------- - str - Issues log. - - """ - logger.debug("creating issue log") - lines = [] - color = config.get(__name__, 'color_issue') - for issue in issues: - path = Path(*issue['path']) - text = issue['text'] - line = f'{path} {colorize(text, color)}' - lines.append(line) - if len(issues) == 0: - lines.append("no issue found") - log = "\n".join(lines) - return log + if fullmatch(pattern, name) is None: + if not any(fnmatch(name, pattern) for pattern in ignore): + return [f"{name} does not match {pattern}"] + return [] diff --git a/src/treecker/core/tree.py b/src/treecker/core/tree.py index 488d325..62489fe 100644 --- a/src/treecker/core/tree.py +++ b/src/treecker/core/tree.py @@ -170,3 +170,49 @@ def tree_node(directory, ignore, hashing): if hashing: add_hash(directory, node) return node + + +def get_differences(old, new, hashing, path=None): + """Return a list of the differences between two tree objects. + + Parameters + ---------- + old : dict + Old directory node. + new : dict + New directory node. + hashing : bool + Compare file hash values. + path : list + Initial path. + + Returns + ------- + list + Differences betwen the two nodes. + + """ + logger.debug("getting differences at %s", path) + if path is None: + path = [] + listing = [] + if isinstance(old, dict) and isinstance(new, dict): + for node in old: + if node in new: + listing += get_differences( + old[node], + new[node], + hashing, + path+[node], + ) + else: + listing.append({'type': 'removed', 'path': path+[node]}) + for node in new: + if node not in old: + listing.append({'type': 'added', 'path': path+[node]}) + elif isinstance(old, dict) or isinstance(new, dict): + listing.append({'type': 'removed', 'path': path}) + listing.append({'type': 'added', 'path': path}) + elif (old[0] != new[0]) or hashing and (old[1] != new[1]): + listing.append({'type': 'edited', 'path': path}) + return listing diff --git a/src/treecker/default.conf b/src/treecker/default.conf index a8d5110..ff3cf2c 100644 --- a/src/treecker/default.conf +++ b/src/treecker/default.conf @@ -13,16 +13,24 @@ blue = \033[34m magenta = \033[35m cyan = \033[36m -[treecker.core.comparison] +[treecker.core.contents] +patterns_pdf = *.pdf +patterns_pic = *.png *.jpg *.gif *.jpeg +patterns_tar = *.tar +patterns_zip = *.zip +ignore_patterns = +block_size = 65536 + +[treecker.core.display] color_added = green color_removed = red color_edited = yellow symbol_added = (+) symbol_removed = (-) symbol_edited = (!) +color_issue = yellow [treecker.core.naming] -color_issue = yellow match_pattern = ([a-z]|[0-9]|\.|-|_)* ignore_patterns = diff --git a/src/treecker/main/commit.py b/src/treecker/main/commit.py index a5cfa62..e033588 100644 --- a/src/treecker/main/commit.py +++ b/src/treecker/main/commit.py @@ -12,8 +12,9 @@ from logging import getLogger from os.path import join from treecker import config -from treecker.core.comparison import get_differences, differences_log +from treecker.core.display import differences_log from treecker.core.snapshot import initialized, load, save, take +from treecker.core.tree import get_differences logger = getLogger(__name__) diff --git a/src/treecker/main/issues.py b/src/treecker/main/issues.py index 98ec863..5b7de8b 100644 --- a/src/treecker/main/issues.py +++ b/src/treecker/main/issues.py @@ -11,8 +11,10 @@ from logging import getLogger from os.path import join from treecker import config -from treecker.core.naming import get_issues, issues_log +from treecker.core.contents import file_issues +from treecker.core.naming import name_issues from treecker.core.snapshot import take +from treecker.core.display import issues_log logger = getLogger(__name__) @@ -56,6 +58,35 @@ def main(**kwargs): tree = snap['tree'] logger.debug("displaying recommendations") - listing = get_issues(tree) + listing = rec(kwargs['dir'], tree) log = issues_log(listing) print(log) + + +def rec(directory, tree, path=None): + """Return the issues encountered in the directory. + + Parameters + ---------- + directory : str + Scanned directory. + tree : dict + Tree structure of files in the directory. + path : list + Path currently under study. + + """ + if path is None: + path = [] + listing = [] + for name, child in tree.items(): + target_name = path+[name] + target_path = join(directory, *path, name) + for text in name_issues(target_path): + listing.append({'path': target_name, 'text': text}) + if isinstance(child, dict): + listing += rec(directory, child, target_name) + else: + for text in file_issues(target_path): + listing.append({'path': target_name, 'text': text}) + return listing diff --git a/src/treecker/main/status.py b/src/treecker/main/status.py index ae035d0..9e21b82 100644 --- a/src/treecker/main/status.py +++ b/src/treecker/main/status.py @@ -12,8 +12,9 @@ from logging import getLogger from os.path import join from treecker import config -from treecker.core.comparison import get_differences, differences_log +from treecker.core.display import differences_log from treecker.core.snapshot import initialized, load, take +from treecker.core.tree import get_differences logger = getLogger(__name__) diff --git a/tests/references.json b/tests/references.json index 12a54c3..d5534d4 100755 --- a/tests/references.json +++ b/tests/references.json @@ -13,9 +13,9 @@ [["subdir", "setup.sh"], [49]] ], "issues": [ - [{"UP": [10]}, 1], - [{"spa ce": [2]}, 1], - [{"a.tar.gz": [2]}, 0] + ["UP", 1], + ["spa ce", 1], + ["a.tar.gz", 0] ], "differences": [ [ diff --git a/tests/run.py b/tests/run.py index d0a0c03..cac9e50 100755 --- a/tests/run.py +++ b/tests/run.py @@ -16,7 +16,7 @@ from sys import stderr from tempfile import TemporaryDirectory, TemporaryFile from unittest import main, TestCase -from treecker.core import comparison, naming, snapshot, tree +from treecker.core import naming, snapshot, tree from treecker.main import commit, init, issues, status @@ -130,13 +130,13 @@ class TestCore(TestCase): def test_naming_issues(self): """Test naming issues.""" for node, length in ref['issues']: - data = naming.get_issues(node) + data = naming.name_issues(node) self.assertEqual(len(data), length) def test_differences(self): """Test identification of differences.""" for node1, node2, hashing, length in ref['differences']: - data = comparison.get_differences(node1, node2, hashing) + data = tree.get_differences(node1, node2, hashing) self.assertEqual(length, len(data)) -- GitLab From 848413700a8efefcb554a49a79102d4999e882ce Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 04:09:06 +0200 Subject: [PATCH 3/7] change PyPDF2 logger level --- src/treecker/core/contents.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/treecker/core/contents.py b/src/treecker/core/contents.py index eabeb5b..2a6cd4d 100644 --- a/src/treecker/core/contents.py +++ b/src/treecker/core/contents.py @@ -21,6 +21,8 @@ from treecker import config logger = getLogger(__name__) +getLogger("PyPDF2").setLevel('ERROR') + Image.MAX_IMAGE_PIXELS = None -- GitLab From 55bf1d0dab0da393ffd08a0abf50c18aeae897f4 Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 05:13:09 +0200 Subject: [PATCH 4/7] update parameter names --- src/treecker/core/contents.py | 1 + src/treecker/core/naming.py | 9 +++------ src/treecker/core/snapshot.py | 2 +- src/treecker/default.conf | 8 ++++---- src/treecker/main/issues.py | 23 ++++++++++++++--------- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/treecker/core/contents.py b/src/treecker/core/contents.py index 2a6cd4d..39d7079 100644 --- a/src/treecker/core/contents.py +++ b/src/treecker/core/contents.py @@ -208,6 +208,7 @@ def file_issues(path): Error messages. """ + logger.debug("analyzing the contents of %s", path) mapping = { 'patterns_pdf': pdf_issues, 'patterns_pic': picture_issues, diff --git a/src/treecker/core/naming.py b/src/treecker/core/naming.py index 0919823..bbed222 100644 --- a/src/treecker/core/naming.py +++ b/src/treecker/core/naming.py @@ -6,7 +6,6 @@ This module implements the file naming check. """ -from fnmatch import fnmatch from logging import getLogger from os.path import basename from re import fullmatch @@ -31,11 +30,9 @@ def name_issues(path): Error messages. """ - logger.debug("getting naming issues for %s", path) + logger.debug("analyzing the name of %s", path) name = basename(path) - pattern = config.get(__name__, 'match_pattern') - ignore = config.get(__name__, 'ignore_patterns').split() + pattern = config.get(__name__, 'match') if fullmatch(pattern, name) is None: - if not any(fnmatch(name, pattern) for pattern in ignore): - return [f"{name} does not match {pattern}"] + return [f"{name} does not match {pattern}"] return [] diff --git a/src/treecker/core/snapshot.py b/src/treecker/core/snapshot.py index ed2cf8d..8de14a9 100644 --- a/src/treecker/core/snapshot.py +++ b/src/treecker/core/snapshot.py @@ -37,7 +37,7 @@ def take(directory, hashing): """ logger.debug("taking snapshot of %s", directory) file = config.get(__name__, 'snap_file') - ignore = config.get(__name__, 'ignore_patterns').split() + ignore = config.get(__name__, 'ignore').split() ignore.append(file) date = datetime.now(timezone.utc).isoformat(timespec="seconds") node = tree_node(directory, ignore, hashing) diff --git a/src/treecker/default.conf b/src/treecker/default.conf index ff3cf2c..b933a7a 100644 --- a/src/treecker/default.conf +++ b/src/treecker/default.conf @@ -18,7 +18,6 @@ patterns_pdf = *.pdf patterns_pic = *.png *.jpg *.gif *.jpeg patterns_tar = *.tar patterns_zip = *.zip -ignore_patterns = block_size = 65536 [treecker.core.display] @@ -31,12 +30,11 @@ symbol_edited = (!) color_issue = yellow [treecker.core.naming] -match_pattern = ([a-z]|[0-9]|\.|-|_)* -ignore_patterns = +match = ([a-z]|[0-9]|\.|-|_)* [treecker.core.snapshot] snap_file = treecker.json -ignore_patterns = +ignore = [treecker.core.tree] hash_algo = md5 @@ -51,6 +49,8 @@ hash = False [treecker.main.issues] dir = . +ignore_name = +ignore_contents = [treecker.main.status] dir = . diff --git a/src/treecker/main/issues.py b/src/treecker/main/issues.py index 5b7de8b..3766b51 100644 --- a/src/treecker/main/issues.py +++ b/src/treecker/main/issues.py @@ -7,6 +7,7 @@ directory names. """ +from fnmatch import fnmatch from logging import getLogger from os.path import join @@ -79,14 +80,18 @@ def rec(directory, tree, path=None): if path is None: path = [] listing = [] + ignore_name = config.get(__name__, 'ignore_name').split() + ignore_contents = config.get(__name__, 'ignore_contents').split() for name, child in tree.items(): - target_name = path+[name] - target_path = join(directory, *path, name) - for text in name_issues(target_path): - listing.append({'path': target_name, 'text': text}) - if isinstance(child, dict): - listing += rec(directory, child, target_name) - else: - for text in file_issues(target_path): - listing.append({'path': target_name, 'text': text}) + target_path = path + [name] + target = join(directory, *path, name) + if not any(fnmatch(name, ignored) for ignored in ignore_name): + for text in name_issues(target): + listing.append({'path': target_path, 'text': text}) + if not any(fnmatch(name, ignored) for ignored in ignore_contents): + if isinstance(child, dict): + listing += rec(directory, child, target_path) + else: + for text in file_issues(target): + listing.append({'path': target_path, 'text': text}) return listing -- GitLab From de48a43a143392be080d5c6fb31a6ba02b2da111 Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 05:37:30 +0200 Subject: [PATCH 5/7] update documentation --- README.md | 2 +- docs/source/practice/examples/configuration.rst | 8 ++++++-- docs/source/practice/examples/vanilla.rst | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 965c5cd..741aba6 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ These features are accessible via the following commands: * `init`: To create the first snapshot of a directory. * `status`: To display the changes since last snapshot. * `commit`: To save the change in a new snapshot. -* `issues`: To display incorrectly named files and directories. +* `issues`: To display misnamed or unreadable files. ## Installation diff --git a/docs/source/practice/examples/configuration.rst b/docs/source/practice/examples/configuration.rst index 7c427c1..6d6825e 100644 --- a/docs/source/practice/examples/configuration.rst +++ b/docs/source/practice/examples/configuration.rst @@ -45,9 +45,13 @@ It is then possible to choose, among others, which file or directory names will .. code-block:: ini [treecker.core.naming] - ignore-patterns = README* LICENSE* CITATION* INSTALL* SETUP* MANIFEST* SOURCES* PKG-INFO Makefile *.php LC_MESSAGES en_US en_GB fr_FR + match = ([0-9]{8}T[0-9]{6}Z)?([a-z]|[0-9]|\.|-|_)* [treecker.core.snapshot] - ignore-patterns = __pycache__ .git + ignore = __pycache__ .git + + [treecker.main.issues] + ignore_name = README* LICENSE* CITATION* INSTALL* SETUP* MANIFEST* SOURCES* PKG-INFO Makefile *.php LC_MESSAGES en_US en_GB fr_FR + ignore_contents = corrupted For more configuration options, see the default configuration file `default.conf `_. diff --git a/docs/source/practice/examples/vanilla.rst b/docs/source/practice/examples/vanilla.rst index 9dbd538..b3020bc 100644 --- a/docs/source/practice/examples/vanilla.rst +++ b/docs/source/practice/examples/vanilla.rst @@ -37,7 +37,7 @@ This will overwrite the ``treecker.json`` file. Display issues ~~~~~~~~~~~~~~ -To display incorrectly nammed files and directories, execute the following command. +To display misnamed or unreadable files, execute the following command. .. code-block:: bash -- GitLab From 9165fc0b420a9c13373235f99dbc01384ff0767f Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 05:57:55 +0200 Subject: [PATCH 6/7] test corrupted file detection --- tests/material/corrupted/file.gif | 1 + tests/material/corrupted/file.jpg | 1 + tests/material/corrupted/file.pdf | 1 + tests/material/corrupted/file.tar | 1 + tests/material/corrupted/file.zip | 1 + tests/run.py | 13 ++++++++++++- 6 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/material/corrupted/file.gif create mode 100644 tests/material/corrupted/file.jpg create mode 100644 tests/material/corrupted/file.pdf create mode 100644 tests/material/corrupted/file.tar create mode 100644 tests/material/corrupted/file.zip diff --git a/tests/material/corrupted/file.gif b/tests/material/corrupted/file.gif new file mode 100644 index 0000000..3413bf9 --- /dev/null +++ b/tests/material/corrupted/file.gif @@ -0,0 +1 @@ +corrupted file contents diff --git a/tests/material/corrupted/file.jpg b/tests/material/corrupted/file.jpg new file mode 100644 index 0000000..3413bf9 --- /dev/null +++ b/tests/material/corrupted/file.jpg @@ -0,0 +1 @@ +corrupted file contents diff --git a/tests/material/corrupted/file.pdf b/tests/material/corrupted/file.pdf new file mode 100644 index 0000000..3413bf9 --- /dev/null +++ b/tests/material/corrupted/file.pdf @@ -0,0 +1 @@ +corrupted file contents diff --git a/tests/material/corrupted/file.tar b/tests/material/corrupted/file.tar new file mode 100644 index 0000000..3413bf9 --- /dev/null +++ b/tests/material/corrupted/file.tar @@ -0,0 +1 @@ +corrupted file contents diff --git a/tests/material/corrupted/file.zip b/tests/material/corrupted/file.zip new file mode 100644 index 0000000..3413bf9 --- /dev/null +++ b/tests/material/corrupted/file.zip @@ -0,0 +1 @@ +corrupted file contents diff --git a/tests/run.py b/tests/run.py index cac9e50..eea4a72 100755 --- a/tests/run.py +++ b/tests/run.py @@ -9,6 +9,7 @@ This script executes the unit tests for the package. from contextlib import contextmanager, redirect_stdout from json import load, dumps +from os import listdir from pathlib import Path from shutil import copytree, rmtree from subprocess import CalledProcessError, run @@ -16,7 +17,7 @@ from sys import stderr from tempfile import TemporaryDirectory, TemporaryFile from unittest import main, TestCase -from treecker.core import naming, snapshot, tree +from treecker.core import contents, naming, snapshot, tree from treecker.main import commit, init, issues, status @@ -133,6 +134,16 @@ class TestCore(TestCase): data = naming.name_issues(node) self.assertEqual(len(data), length) + def test_contents_issues(self): + """Test contents issues.""" + corrupted = dir_material/'corrupted' + for file in listdir(corrupted): + with self.subTest(file=file): + path = corrupted/file + if path.is_file(): + listing = contents.file_issues(path) + self.assertEqual(len(listing), 1) + def test_differences(self): """Test identification of differences.""" for node1, node2, hashing, length in ref['differences']: -- GitLab From bb7041d66f904d713f829281ecd1ae29887e2f5c Mon Sep 17 00:00:00 2001 From: Dunstan Becht Date: Fri, 2 Jun 2023 06:02:42 +0200 Subject: [PATCH 7/7] lint code --- src/treecker/core/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/treecker/core/tree.py b/src/treecker/core/tree.py index 62489fe..c090ac5 100644 --- a/src/treecker/core/tree.py +++ b/src/treecker/core/tree.py @@ -7,7 +7,7 @@ This module implements the functionalities related to the trees. """ from fnmatch import fnmatch -from hashlib import new +from hashlib import new as new_hash from logging import getLogger from multiprocessing import Pool from os import stat @@ -36,7 +36,7 @@ def file_hash(path): logger.debug("computing hash of %s", path) size = config.getint(__name__, 'block_size') algo = config.get(__name__, 'hash_algo') - hashing = new(algo) + hashing = new_hash(algo) with open(path, 'rb') as file: series = file.read(size) while len(series) > 0: -- GitLab