[RAS] obfuscation module

Thomas Krichel krichel at openlib.org
Fri Feb 17 15:33:24 UTC 2023


  This is the module. I return the prefix REPEC, if a shift has
  happened.

  make() makes the obfuscated handle. ekam() is the opposite,
  for testing. JMBC can perlify. 

  I tested this on about 97000 handles from ArchEc.
  
-- 
  Written by Thomas Krichel http://openlib.org/home/krichel on his 21077th day.

## obfuscate.py

import datetime

class Obfuscate:

    def __init__(self, do_verbose=False):
        return None

    def from_file(self, fufi, cut):
        """testing routine, can be deleted"""
        ## read handles from a file
        handles_file = open(fufi, 'r')
        while True:
            line = handles_file.readline()
            if not line:
                break
            self.work_on_line(line.strip(), cut)
        return None

    def work_on_line(self, line, cut):
        # # in case we have a prefix of the handle
        old = line[cut:]
        new = self.make(old)
        check = self.ekam(new)
        check_lc = check.lower()
        old_lc = old.lower()
        if not check_lc == old_lc:
            raise Exception(f"inconsistency on {old} --> {new}")

    def make(self, old):
        """main function used by external callers"""
        if not (old.startswith('repec') or old.startswith('RePEc')):
            print(old)
            quit()
        old_date = old[10:20]
        if old == 'RePEc:per':
            return old
        try:
            old_parsed = datetime.datetime.strptime(old_date, '%Y-%m-%d')
        except ValueError:
            return old
        year = int(old_parsed.strftime("%Y"))
        if year < 1900:
            # # early dates could be confusing because of changes
            # # in adoption of Greogian calendar
            return old
        if year > 2100:
            # # I see no reason to obfuscate these
            return old
        shift = len(old)
        if shift % 2 == 1:
            # # for odd values, take negative
            shift = -shift
        new_parsed = old_parsed + datetime.timedelta(days=shift)
        new_date = new_parsed.strftime("%Y-%m-%d")
        new = old.replace(old_date, new_date, 1)
        new = new.replace('repec', 'REPEC')
        new = new.replace('RePEc', 'REPEC')
        return new

    def ekam(self, new):
        """opposite of make, a check"""
        if new == 'RePEc:per':
            return new
        if not new.startswith('REPEC'):
            return new
        new_date = new[10:20]
        new_parsed = datetime.datetime.strptime(new_date, '%Y-%m-%d')
        shift = len(new)
        if shift % 2 == 0:
            # # for even values, take negative
            shift = -shift
        old_parsed = new_parsed + datetime.timedelta(days=shift)
        old_date = old_parsed.strftime("%Y-%m-%d")
        old = new.replace('REPEC', 'RePEc')
        old = old.replace(new_date, old_date)
        return old



More information about the RAS-run mailing list