from abc import ABC, abstractmethod
from copy import copy
from dataclasses import dataclass, field
from json import dumps
from typing import Any, Iterable, List, Literal, Mapping, Optional, Union, overload
import pywikibot
from sentry_sdk import push_scope
# Make all imports from submodules available here
from .constants import (
session, # noqa: F401
url_prop, # noqa: F401
retrieved_prop, # noqa: F401
archive_date_prop, # noqa: F401
archive_url_prop, # noqa: F401
deprecated_reason_prop, # noqa: F401
link_rot_id, # noqa: F401
preferred_rank_reason_prop, # noqa: F401
site, # noqa: F401
EntityPage, # noqa: F401
)
from .dataclasses import (
ClaimShortcutMixin, # noqa: F401
ExtraProperty, # noqa: F401
ExtraQualifier, # noqa: F401
ExtraReference, # noqa: F401
)
from .process_reason import (
ProcessReason, # noqa: F401
DifferentRankContext, # noqa: F401
ReplaceValueContext, # noqa: F401
DeleteValuesContext, # noqa: F401
ReplaceQualifierValueContext, # noqa: F401
DeleteQualifierValuesContext, # noqa: F401
NewClaimFromQualifierContext, # noqa: F401
MergedReferenceContext, # noqa: F401
)
from .sentry import (
sentry_avilable, # noqa: F401
load_sentry, # noqa: F401
report_exception, # noqa: F401
start_span, # noqa: F401
start_transaction, # noqa: F401
)
from .transformers import de_archivify_url_property # noqa: F401
from .utils import (
add_claim_locally, # noqa: F401
add_qualifier_locally, # noqa: F401
add_reference_locally, # noqa: F401
get_random_hex, # noqa: F401
get_sparql_query, # noqa: F401
append_to_source, # noqa: F401
merge_reference_groups, # noqa: F401
OutputHelper, # noqa: F401
mark_claim_as_preferred, # noqa: F401
remove_qualifiers, # noqa: F401
resolve_multiple_property_claims, # noqa: F401
get_entity_id_from_entity_url, # noqa: F401
)
Output = Mapping[str, List[ExtraProperty]]
[docs]
@dataclass(frozen=True)
class Config:
auto_dearchivify_urls: bool = True
"""Automatically get rid of archive.org URLs and turn them into the original URL along with necessary qualifiers"""
auto_deprecate_archified_urls: bool = True
"""Mark dearchivified URLs as deprecated"""
create_or_edit_main_property_whitelist_enabled: bool = False
"""Enable the whitelist for creating or editing main properties"""
create_or_edit_main_property_whitelist: List[str] = field(default_factory=list)
"""The whitelist for creating or editing main properties"""
copy_ranks_for_nonwhitelisted_main_properties: bool = True
"""Copy the rank of non-whitelisted main properties (requires create_or_edit_main_property_whitelist_enabled)"""
create_or_edit_qualifiers_for_main_property_whitelist_enabled: bool = False
"""Enable the whitelist for creating or editing qualifiers when the main property is blacklisted (requires create_or_edit_main_property_whitelist_enabled)"""
create_or_edit_qualifiers_for_main_property_whitelist: List[str] = field(
default_factory=list
)
"""The whitelist for creating or editing qualifiers when the main property is blacklisted (requires create_or_edit_main_property_whitelist_enabled)"""
create_or_edit_references_for_main_property_whitelist_enabled: bool = False
"""Enable the whitelist for creating or editing references when the main property is blacklisted (requires create_or_edit_main_property_whitelist_enabled)"""
create_or_edit_references_for_main_property_whitelist: List[str] = field(
default_factory=list
)
"""The whitelist for creating or editing references when the main property is blacklisted (requires create_or_edit_main_property_whitelist_enabled)"""
act_on_cycle: bool = False
"""If the bot should do something if it detects a cycle. If False, there is a chance the bot gets stuck in an infinite loop
.. versionadded:: 7.4.0
.. deprecated:: 7.4.1
"""
throw_on_no_edit_cycle: bool = True
"""If the bot should throw an exception if no edits were made to the item but a cycle is being signalled. If False, the loop will silently stop.
.. versionadded:: 7.4.0
.. deprecated:: 7.4.1
"""
[docs]
class PropertyAdderBot(ABC):
"""A bot that adds properties to pages.
Supports merging existing properties with the internal representation.
"""
def __init__(self):
load_sentry()
self.config = Config()
self.__random_hex = get_random_hex()
def set_config(self, config: Config):
self.config = config
[docs]
def get_edit_group_id(self) -> Union[str, None]:
"""Get the edit group ID for the bot.
This is used to identify the bot in the edit summary.
:return: The edit group ID for the bot. Return None to omit it.
"""
return self.__random_hex
[docs]
@abstractmethod
def get_edit_summary(self, page: EntityPage) -> str:
"""Get the edit summary for the bot.
:param page: The item page that was edited.
:return: The edit summary to use.
"""
pass
[docs]
def get_full_summary(self, message: str) -> str:
"""Get a fully formatted summary that can be used to update the API and track it to the EditGroup.
:param message: The message to format with. To use the default summary, pass in the result of :meth:`.get_edit_summary`.
:return: The fully formatted summary.
"""
if edit_group_id := self.get_edit_group_id():
return f"{message} ([[:toolforge:editgroups/b/CB/{edit_group_id}|details]])"
return message
[docs]
@abstractmethod
def run_item(
self,
item: EntityPage,
) -> Output:
"""The main work that should be done externally.
This method will take an item and return a dictionary of list of ExtraProperties.
The keys are the property IDs.
:param item: The item to work on.
:return: A dictionary of list of ExtraProperties. Recommended to use :class:`.OutputHelper`.
"""
pass
[docs]
def can_add_main_property(self, extra_property: ExtraProperty) -> bool:
"""Return if the property can be added or edited"""
return not extra_property.reference_only
[docs]
def same_main_property(
self,
existing_claim: pywikibot.Claim,
new_claim: pywikibot.Claim,
page: EntityPage,
) -> bool:
"""Return if the main property is the same.
:param existing_claim: The existing claim to compare to.
:param new_claim: The new claim to compare to.
:param page: The item page that is being edited.
:return: If the main property is the same.
"""
return existing_claim.getTarget() == new_claim.getTarget()
[docs]
def same_qualifier(
self,
existing_qualifier: pywikibot.Claim,
new_qualifier: pywikibot.Claim,
main_claim: pywikibot.Claim,
page: EntityPage,
) -> bool:
"""Return if the qualifier is the same.
:param existing_qualifier: The existing qualifier to compare to.
:param new_qualifier: The new qualifier to compare to.
:param main_claim: The main claim that the qualifier is on.
:param page: The item page that is being edited.
:return: If the qualifier is the same.
"""
return existing_qualifier.getTarget() == new_qualifier.getTarget()
[docs]
def post_output_process_hook(self, output: Output, item: EntityPage) -> bool:
"""Do additional processing after all output has been processed.
:param output: The output that was processed.
:param item: The item that was edited.
:return: Return whether or not the item was changed. This will be used
to determine if an API request should be made.
"""
return False
[docs]
def pre_edit_process_hook(self, output: Output, item: EntityPage) -> None:
"""Do additional processing before the item is edited.
This hook only fires if an API request will be made.
:param output: The output that was processed.
:param item: The item that will be edited.
"""
[docs]
def post_edit_process_hook(self, output: Output, item: EntityPage) -> None:
"""Do additional processing after the item is edited.
This hook only fires if an API request was made.
:param output: The output that was processed.
:param item: The item that was edited.
"""
[docs]
def whitelisted_claim(self, prop: ExtraProperty) -> bool:
"""Return if the claim is whitelisted.
:param prop: The property to check.
:return: If the claim is whitelisted.
"""
if self.config.create_or_edit_main_property_whitelist_enabled:
if prop.claim.getID() in self.config.create_or_edit_main_property_whitelist:
return True
return False
return True
[docs]
def whitelisted_qualifier(
self, prop: ExtraProperty, qualifier: ExtraQualifier
) -> bool:
"""Return if the qualifier is whitelisted.
:param prop: The property to check.
:param qualifier: The qualifier to check.
:return: If the qualifier is whitelisted.
"""
if self.config.create_or_edit_qualifiers_for_main_property_whitelist_enabled:
if (
qualifier.claim.getID()
in self.config.create_or_edit_qualifiers_for_main_property_whitelist
):
return True
return False
return True
[docs]
def whitelisted_reference(
self, prop: ExtraProperty, reference: ExtraReference
) -> bool:
"""Return if the reference is whitelisted.
:param prop: The property to check.
:param reference: The reference to check.
:return: If the reference is whitelisted.
"""
if self.config.create_or_edit_references_for_main_property_whitelist_enabled:
if any(
claim.getID(False)
in self.config.create_or_edit_references_for_main_property_whitelist
for claim in reference.new_reference_props.values()
):
return True
return False
return True
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.missing_property, ProcessReason.missing_value],
*,
claim: ExtraProperty,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.different_rank],
*,
claim: ExtraProperty,
context: DifferentRankContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.replace_value],
*,
claim: ExtraProperty,
context: ReplaceValueContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.delete_values],
*,
claim: ExtraProperty,
context: DeleteValuesContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[
ProcessReason.missing_qualifier_property,
ProcessReason.missing_qualifier_value,
],
*,
claim: ExtraProperty,
qualifier: ExtraQualifier,
):
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.replace_qualifier_value],
*,
claim: ExtraProperty,
qualifier: ExtraQualifier,
context: ReplaceQualifierValueContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.delete_qualifier_values],
*,
claim: ExtraProperty,
qualifier: ExtraQualifier,
context: DeleteQualifierValuesContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.new_claim_from_qualifier],
*,
claim: ExtraProperty,
qualifier: ExtraQualifier,
context: NewClaimFromQualifierContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.missing_reference],
*,
claim: ExtraProperty,
reference: ExtraReference,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.merged_reference],
*,
claim: ExtraProperty,
reference: ExtraReference,
context: MergedReferenceContext,
) -> bool:
...
@overload
def processed_hook(
self,
item: EntityPage,
reason: Literal[ProcessReason.post_output],
) -> bool:
...
[docs]
def processed_hook(
self,
item: EntityPage,
reason: ProcessReason,
*,
claim: Optional[ExtraProperty] = None,
qualifier: Optional[ExtraQualifier] = None,
reference: Optional[ExtraReference] = None,
context: Optional[Mapping[str, Any]] = None,
) -> bool:
"""Do processing whenever the item is modified. This method is called directly after the item is modified.
.. versionadded:: 5.8.0
:param item: The item that was modified.
:param reason: The reason the item was modified.
:param claim: The main claim that was added or is having qualifiers/references added, defaults to None
:param qualifier: The qualifier that was modified, defaults to None
:param reference: The reference that was modified, defaults to None
:param context: Additional context with the operation, defaults to None
:return: If the item was modified. This will cause a re-cycle of the process loop so only use this if something on the same tier or higher was modified.
+--------------------+-----------------------+-----------------------+-----------------------+
| Thing being added | Thing being modified |
+ +-----------------------+-----------------------+-----------------------+
| | Main statement | Qualifier | Reference |
+====================+=======================+=======================+=======================+
| Main statement | Yes | Yes | Yes |
+--------------------+-----------------------+-----------------------+-----------------------+
| Qualifier | No | Yes | Yes |
+--------------------+-----------------------+-----------------------+-----------------------+
| Reference | No | No | Yes |
+--------------------+-----------------------+-----------------------+-----------------------+
"""
return False
[docs]
def process(self, output: Output, item: EntityPage) -> bool:
"""Processes the output from run_item.
:param output: The output to process
:param item: The item to process
:return: If any edits were made to the item.
"""
acted = False
re_cycle = True
# This is an (inefficient) way to prevent cycles. If an actual change is made, the hash will change.
second_previous_hash = None
previous_hash = hash(dumps(item.toJSON()))
while re_cycle and second_previous_hash != previous_hash:
re_cycle = False
for property_id, extra_props in copy(output).items():
for extra_prop_data in extra_props.copy():
new_claim = original_claim = extra_prop_data.claim
if new_claim.type == "url" and self.config.auto_dearchivify_urls:
de_archivify_url_property(
extra_prop_data,
deprecate=self.config.auto_deprecate_archified_urls,
)
if property_id not in item.claims:
if self.can_add_main_property(
extra_prop_data
) and self.whitelisted_claim(extra_prop_data):
# This is triggered if there are no statements for the property
add_claim_locally(item, new_claim)
re_cycle |= self.processed_hook(
item,
reason=ProcessReason.missing_property,
claim=extra_prop_data,
)
acted = True
else:
continue
else:
for existing_claim in item.claims[property_id].copy():
assert isinstance(existing_claim, pywikibot.Claim)
if self.same_main_property(existing_claim, new_claim, item):
# This is triggered if there is a statement for the property exactly matching the one we want to add
if new_claim.getRank() != existing_claim.getRank():
if (
self.whitelisted_claim(extra_prop_data)
or self.config.copy_ranks_for_nonwhitelisted_main_properties
):
old_rank = existing_claim.getRank()
existing_claim.rank = new_claim.getRank()
re_cycle |= self.processed_hook(
item,
ProcessReason.different_rank,
claim=extra_prop_data,
context=DifferentRankContext(
existing_claim=existing_claim,
old_rank=old_rank,
),
)
acted = True
new_claim = extra_prop_data.claim = existing_claim
break
else:
if (
extra_prop_data.replace_if_conflicting_exists
and self.whitelisted_claim(extra_prop_data)
):
# This is triggered if `extra_prop_data.replace_if_conflicting_exists` is set to True
# and this is the first statement with the property that is not exactly matching the one we want to add
old_value = existing_claim.getTarget()
existing_claim.setTarget(new_claim.getTarget())
if new_claim.getRank() != existing_claim.getRank():
old_rank = existing_claim.getRank()
existing_claim.rank = new_claim.getRank()
re_cycle |= self.processed_hook(
item,
ProcessReason.different_rank,
claim=extra_prop_data,
context=DifferentRankContext(
existing_claim=existing_claim,
old_rank=old_rank,
),
)
original_new_claim = new_claim
new_claim = extra_prop_data.claim = existing_claim
re_cycle |= self.processed_hook(
item,
ProcessReason.replace_value,
claim=extra_prop_data,
context=ReplaceValueContext(
existing_claim=existing_claim,
new_claim=original_new_claim,
old_value=old_value,
),
)
if (
len(item.claims[property_id]) > 1
and extra_prop_data.delete_other_if_replacing
):
deleted = item.claims[property_id].copy()
deleted.remove(new_claim)
re_cycle |= self.processed_hook(
item,
ProcessReason.delete_values,
claim=extra_prop_data,
context=DeleteValuesContext(
deleted_claims=deleted
),
)
item.claims[property_id] = [new_claim]
acted = True
break
else:
# This code section triggers if there are statements for the property but none of them match the one we want to add
# and we did not opt for replacement.
if (
extra_prop_data.skip_if_conflicting_language_exists
and property_id in item.claims
): # type: ignore
found_conflicting_language = False
for existing_claim in item.claims[property_id]:
assert isinstance(existing_claim, pywikibot.Claim)
if isinstance(
existing_claim.getTarget(),
pywikibot.WbMonolingualText,
):
lang_target: pywikibot.WbMonolingualText = (
existing_claim.getTarget()
) # type: ignore
if (
lang_target.language
== new_claim.getTarget().language # type: ignore
and lang_target != new_claim.getTarget()
): # type: ignore
found_conflicting_language = True
break
else:
# The existing claim is not a monolingual text, so we can't compare it to the new one
continue
else:
# If we're here, we did not find a conflicting language
if self.can_add_main_property(
extra_prop_data
) and self.whitelisted_claim(extra_prop_data):
re_cycle |= self.processed_hook(
item,
ProcessReason.missing_value,
claim=extra_prop_data,
)
add_claim_locally(item, new_claim)
acted = True
else:
continue
if found_conflicting_language:
continue
elif extra_prop_data.skip_if_conflicting_exists:
continue
if self.can_add_main_property(
extra_prop_data
) and self.whitelisted_claim(extra_prop_data):
add_claim_locally(item, new_claim)
re_cycle |= self.processed_hook(
item,
ProcessReason.missing_value,
claim=extra_prop_data,
)
acted = True
else:
continue
extra_prop_data.sort_qualifiers()
added_qualifiers = []
for (
qualifier_prop,
qualifiers,
) in extra_prop_data.qualifiers.copy().items():
for qualifier_data in qualifiers.copy():
qualifier = qualifier_data.claim
if not new_claim.qualifiers.get(qualifier_prop, []) and (
self.whitelisted_claim(extra_prop_data)
or self.whitelisted_qualifier(
extra_prop_data, qualifier_data
)
):
add_qualifier_locally(new_claim, qualifier)
added_qualifiers.append(qualifier)
re_cycle |= self.processed_hook(
item,
ProcessReason.missing_qualifier_property,
claim=extra_prop_data,
qualifier=qualifier_data,
)
acted = True
else:
for existing_qualifier in new_claim.qualifiers[
qualifier_prop
].copy():
if self.same_qualifier(
existing_qualifier, qualifier, new_claim, item
):
break
else:
if (
qualifier_data.replace_if_conflicting_exists
and (
self.whitelisted_claim(extra_prop_data)
or self.whitelisted_qualifier(
extra_prop_data, qualifier_data
)
)
):
old_value = existing_qualifier.getTarget()
existing_qualifier.setTarget(
qualifier.getTarget()
)
re_cycle |= self.processed_hook(
item,
ProcessReason.replace_qualifier_value,
claim=extra_prop_data,
qualifier=qualifier_data,
context=ReplaceQualifierValueContext(
existing_qualifier=existing_qualifier,
new_qualifier=qualifier,
old_value=old_value,
),
)
qualifier = (
qualifier_data.claim
) = existing_qualifier
if (
len(
new_claim.qualifiers[qualifier_prop]
)
> 1
and qualifier_data.delete_other_if_replacing
):
deleted = new_claim.qualifiers[
qualifier_prop
].copy()
deleted.remove(qualifier)
re_cycle |= self.processed_hook(
item,
ProcessReason.delete_qualifier_values,
claim=extra_prop_data,
qualifier=qualifier_data,
context=DeleteQualifierValuesContext(
deleted_qualifiers=deleted
),
)
new_claim.qualifiers[qualifier_prop] = [
qualifier
]
acted = True
break
else:
made_new_claim = False
if qualifier_data.skip_if_conflicting_exists:
continue
elif (
qualifier_data.make_new_if_conflicting
and self.whitelisted_claim(extra_prop_data)
):
if self.can_add_main_property(extra_prop_data):
old_claim = new_claim
new_claim = (
extra_prop_data.claim
) = original_claim
add_claim_locally(item, new_claim)
for qualifier in added_qualifiers:
add_qualifier_locally(
new_claim, qualifier
)
remove_qualifiers(
old_claim, added_qualifiers
)
added_qualifiers = []
re_cycle |= self.processed_hook(
item,
ProcessReason.new_claim_from_qualifier,
claim=extra_prop_data,
qualifier=qualifier_data,
context=NewClaimFromQualifierContext(
old_claim=old_claim
),
)
acted = True
made_new_claim = True
else:
continue
add_qualifier_locally(new_claim, qualifier)
added_qualifiers.append(qualifier)
if not made_new_claim:
re_cycle |= self.processed_hook(
item,
ProcessReason.missing_qualifier_value,
claim=extra_prop_data,
qualifier=qualifier_data,
)
acted = True
for extra_reference in extra_prop_data.extra_references.copy():
compatible = False
for existing_reference in new_claim.getSources().copy():
if extra_reference.is_compatible_reference(
existing_reference
) and (
self.whitelisted_claim(extra_prop_data)
or self.whitelisted_reference(
extra_prop_data, extra_reference
)
):
compatible = True
if merge_reference_groups(
existing_reference,
list(extra_reference.new_reference_props.values()),
):
re_cycle |= self.processed_hook(
item,
ProcessReason.merged_reference,
claim=extra_prop_data,
reference=extra_reference,
context=MergedReferenceContext(
old_reference_group=existing_reference,
),
)
acted = True
break
if not compatible and (
self.whitelisted_claim(extra_prop_data)
or self.whitelisted_reference(
extra_prop_data, extra_reference
)
):
re_cycle |= self.processed_hook(
item,
ProcessReason.missing_reference,
claim=extra_prop_data,
reference=extra_reference,
)
add_reference_locally(
new_claim, *extra_reference.new_reference_props.values()
)
acted = True
second_previous_hash = previous_hash
previous_hash = hash(dumps(item.toJSON()))
with start_span(
op="post_output_process", description="Post Output Process Hook"
):
if self.post_output_process_hook(output, item):
self.processed_hook(item, ProcessReason.post_output)
if not acted:
acted = True
if acted:
with start_span(op="pre_edit_process", description="Pre Edit Process Hook"):
self.pre_edit_process_hook(output, item)
with start_span(op="edit_entity", description="Edit Entity"):
retries = 3
while retries >= 0:
with start_span(
op="edit_entity_try", description="Edit Entity Attempt"
):
try:
item.editEntity(
summary=self.get_full_summary(
self.get_edit_summary(item)
),
bot=True,
)
break
except pywikibot.exceptions.APIError as e:
retries -= 1
if retries < 0:
raise e
with start_span(
op="post_edit_process", description="Post Edit Process Hook"
):
self.post_edit_process_hook(output, item)
return acted
[docs]
def act_on_item(self, item: EntityPage) -> bool:
"""Act on an item.
:param item: The item to act on.
:return: If any edits were made to the item.
"""
with push_scope(), start_transaction(op="act_on_item", name="Process Item"):
with start_span(op="get_output", description="Get Output"):
output = self.run_item(item)
with start_span(op="process_output", description="Process Output"):
return self.process(output, item)
[docs]
def feed_items(
self, items: Iterable[EntityPage], skip_errored_items: bool = False
) -> None:
"""Feed items to the bot.
:param items: The items to feed.
:param skip_errored_items: If the bot should skip items that errored.
"""
for item in items:
try:
self.act_on_item(item)
except Exception as e:
if skip_errored_items:
report_exception(e)
else:
raise e