User:Deryck Chan/Property migration tool 2

From Wikidata
Jump to navigation Jump to search

Deryck's migrator (more complicated, more versatile)[edit]

# Set up parameters parameters
pidm1 = 'P1112' # main property to migrate from
pidq1 = 'P642' # qualifier property to migrate from
pidm2 = 'P1685' # main property to migrate to
pidq2 = 'P972' # qualifier property to migrate to
limit = 3 # max number of items to process at a time
logFileName = 'migrator2log.txt'

#start of actual script
import pywikibot
from pywikibot import pagegenerators as pg

import datetime

site = pywikibot.Site("wikidata", "wikidata")
wikidata_site = site #compatibility stuff
repo = site.data_repository()    

def getLabelFromObject(WDObject):
    item_dict = WDObject.get()
    item_label = False
    if 'labels' in item_dict:
        if 'en' in item_dict['labels']:
            item_label = item_dict['labels']['en']
    label = item_label
    if (label):
        return label
    else:
        return WDObject.getID()

def getLabelFromWDID(ID): #works for properties only. need separate function for items. why why why
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item = pywikibot.PropertyPage(repo, ID)
    return getLabelFromObject(item)

QUERY = """SELECT DISTINCT ?item
WHERE
{
    ?item wdt:""" + pidm1 + """ ?wdprop ;
             p:""" + pidm1 + """ ?statement .
    ?statement pq:""" + pidq1 + """ ?ofwhat .
}

ORDER BY ASC(?item)
LIMIT """ + str(limit)

edit_summary = 'Deprecate ' + pidm1 + '/' + pidq1 + ', move to ' + pidm2 + '/' + pidq2

generator = pg.WikidataSPARQLPageGenerator(QUERY, site=wikidata_site)
generator = site.preloadpages(generator, pageprops=True)

f = open(logFileName, 'a')
print(datetime.datetime.now(), file=f)

for item in generator:
    # item = pywikibot.ItemPage(repo, qid)

    item_dict = item.get()
    item_label = getLabelFromObject(item)
    qid = item.getID()
    
    print('Now working on ', qid, ' ', item_label)
    print(qid, file=f)
    
    if pidm1 in item_dict['claims']:
        for claim_object in item_dict['claims'][pidm1]:
            if pidq1 in claim_object.qualifiers: 
                # By this point, we have asserted that we have the right combination of main property and qualifier
                for qualifier_object in claim_object.qualifiers[pidq1]:
                    qualifier_target = qualifier_object.getTarget()
                    #if (len(allowed_qualifier_targets) > 0 and 
                    #    qualifier_target.getID() not in allowed_qualifier_targets):
                    #    continue

                    qualifier_target_label = getLabelFromObject(qualifier_target) if type(qualifier_target) == pywikibot.page.ItemPage else '[none]'

                    qualifier_dict = qualifier_object.toJSON()
                    qualifier_dict['property'] =  pidq2 #set up the qualifier change
                    new_qualifier_object = qualifier_object.qualifierFromJSON(site = wikidata_site, data = qualifier_dict)
                    
                    print('Adding qualifier: ', pidq2, ' ', qualifier_target_label)
                    claim_object.addQualifier(new_qualifier_object, summary=edit_summary)
                    # print('Inner loop')


                newclaim = pywikibot.Claim(repo, pidm2)

                # This line is only needed because the old claim and the new claim have different datatypes
                newclaim.setTarget(str(claim_object.target.amount)) 

                # Otherwise could've used this line
                # stringclaim.target = claim.target 

                newclaim.qualifiers = claim_object.qualifiers
                newclaim.sources = claim_object.sources
                newclaim.rank = claim_object.rank
                
                print('Adding claim: ', pidm2, ': ', str(claim_object.target.amount))
                item.addClaim(newclaim, summary=edit_summary)
                
                print('Removing claim: ', pidm1)
                item.removeClaims(claim_object, summary=edit_summary)    
                # print('Outer loop')


    # Finally, pop all instances of the old qualifier property from the statement
    item = pywikibot.ItemPage(repo, qid)
    item_dict = item.get()
    if pidm2 in item_dict['claims']:
        for claim_object in item_dict['claims'][pidm2]:
            if pidq1 in claim_object.qualifiers: 
                # We've found a combination of new main prop + old qualifier prop. So actually pop it
                for qualifier_object in claim_object.qualifiers[pidq1]:
                    print('Removing qualifier: ', pidq1)
                    claim_object.removeQualifier(qualifier_object, summary=edit_summary)
                    # print('Alternative loop')

# Be nice and close the file properly                    
f.close()

Pasleim's migrator (minimalist)[edit]

def changeProperty(p1, p2, q):
    item = pywikibot.ItemPage(repo,q)
    r = requests.get('https://wikidata.org/w/api.php?action=wbgetentities&ids=' + q + '&format=json')
    data = r.json()  
    mydata = {'claims' : []}
    for m in data['entities'][q]['claims'][p1]:
        mydata['claims'].append({"id":m['id'], "remove":""})
        m['mainsnak']['property'] = p2
        m.pop('id', None)
        mydata['claims'].append(m)
    item.editEntity(mydata, summary=u'move claim Property:' + p1 + ' -> Property:' + p2 + '')
    return 1