Skip to content

Commit

Permalink
Add option to only act on duplicate messages
Browse files Browse the repository at this point in the history
  • Loading branch information
djwf committed May 20, 2021
1 parent 7ba6bc3 commit 73629a4
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
1 change: 1 addition & 0 deletions mail_deduplicate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class Config:
default_conf = {
"dry_run": False,
"input_format": False,
"only_act_on_duplicates": False,
"force_unlock": False,
"hash_only": False,
"hash_headers": HASH_HEADERS,
Expand Down
9 changes: 9 additions & 0 deletions mail_deduplicate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ def validate_regexp(ctx, param, value):
"supports maildir and mbox format. Use this option to open up other box "
"format, or bypass unreliable detection.",
)
@click.option(
"-o",
"--only-act-on-duplicates",
is_flag=True,
default=False,
help="Perform any actions only on duplicate messages, not on unique messages.",
)
@click.option(
"-u",
"--force-unlock",
Expand Down Expand Up @@ -239,6 +246,7 @@ def mdedup(
ctx,
dry_run,
input_format,
only_act_on_duplicates,
force_unlock,
hash_only,
hash_header,
Expand Down Expand Up @@ -346,6 +354,7 @@ def mdedup(
conf = Config(
dry_run=dry_run,
input_format=input_format,
only_act_on_duplicates=only_act_on_duplicates,
force_unlock=force_unlock,
hash_only=hash_only,
hash_headers=hash_header,
Expand Down
19 changes: 12 additions & 7 deletions mail_deduplicate/deduplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
("mail_hashes", "Number of unique hashes."),
(
"mail_unique",
"Number of unique mails (which where automatically added to selection).",
"Number of unique mails (which where automatically added to selection "
"unless --only-act-on-duplicates was passed on the command line).",
),
(
"mail_duplicates",
Expand Down Expand Up @@ -404,9 +405,10 @@ def select_all(self):
if mail_count == 1:
logger.debug("Add unique message to selection.")
self.stats["mail_unique"] += 1
self.stats["mail_selected"] += 1
self.stats["set_single"] += 1
candidates = mail_set
if self.conf.only_act_on_duplicates is False:
self.stats["mail_selected"] += 1
self.stats["set_single"] += 1
candidates = mail_set

# We need to resort to a selection strategy to discriminate mails
# within the set.
Expand Down Expand Up @@ -457,9 +459,12 @@ def check_stats(self):
# Mail grouping by hash.
assert self.stats["mail_retained"] >= self.stats["mail_unique"]
assert self.stats["mail_retained"] >= self.stats["mail_duplicates"]
assert self.stats["mail_retained"] == (
self.stats["mail_unique"] + self.stats["mail_duplicates"]
)
if self.conf.only_act_on_duplicates:
assert self.stats["mail_retained"] == self.stats["mail_duplicates"]
else:
assert self.stats["mail_retained"] == (
self.stats["mail_unique"] + self.stats["mail_duplicates"]
)
# Mail selection stats.
assert self.stats["mail_retained"] >= self.stats["mail_skipped"]
assert self.stats["mail_retained"] >= self.stats["mail_discarded"]
Expand Down

0 comments on commit 73629a4

Please sign in to comment.