CSE-107 / hw1 / HW1_CSE107.py
HW1_CSE107.py
Raw
ACTUAL_KEY_LENGTH = 7
ACTUAL_KEY = "ATURING"
HW1_CipherText = "IGLVKRTTRYRZFTEPGVBUUDLIWMYKCMLFVVICHGDCAOCTNZWANAOYKZNTSYIIURJONLJWPOEMSDWFZVBMZJYEBRYEIORIGALJVWUBNFCFJIZCKIYIOFGLVVIAMCFVFGNWZRKVRIMUKQAMBKIRLRIOFGVZPKALMLKUOTBMZUCURMUEBSURHOIOYUBTFVKBTOFSRVQUUKHRBVUNTFJMPARBNPBBNAOYJBEUNZYEKEEPMCFVFZAGXRZQYTAYUMIKLHJDMAZAGXIWOASMUUWCZIHHFNFZRHHXMAIRRJKQBTILUBMLZOHFKWFKCNLVKBSMXLTMNTDMLRLRYAYYXCNXDILZDNZEBHWWESAMCFVCXOFIKMSXEXYOXEKSLCFVNTDTMJWPOAMCFVNTDLNIMAMTAYEKLHEKMVKHXIMSKPRJEIUIBZKNMCJWAZHXZIWAZLBHVABLTAYWQTNTTARQAYTVSSMEIRBGVIAJWXEEWJLIKMKPNTDMBVLNSAZYKPNZCTHSMPGULYUJLZHHMVEUUEQJCWVZVNFEMEGBEYRVQONLYTCEKSRMKMZYWXMLXCURMUELRTCHOIITKTAYLARUFLYTCEKNXNNWEQSMIGZRBEGNTGOKRMBIMNZSMIFCEIRBNZKNRNTNZWAGLBHWZNYTKOTBHXEHOIQAZEEFVKGAAEJIWCKRMSRVQUUKXRBNYOTMKWCXOFIKMBARHPVZNRLLUWMGEAFYIQPGNVCKQMKNLWRZRJEXJCGNHONNGZVBAVSRVQXIZBKTLYOFUEGPUMIUEQRYHTPVJRKNKYJXBTDBHXBBGMTLBMGJEFUELSURILFLHITLUELFKROCTMFZHTNGZBZEVNKPRVRBPRKLGNWMVKHXIMSFNGNEBLTCFZOFYIAGNILBRATKNXLRBRJPHMZBVBEBHEWIGTBIEBUGTAUJJRKNVLLKVGLMIKPRJIZCKIYKCHHFULCEMIFKNXETVFCGZHXMVQZVOKNRVGVRBHTQCRELCELRKDBNZABARHVCQTGTBIEBBAPAICLPOVBFCQOKRMCVAVTCEOUQAMTAYIQTNTMIGZVBAVSNMUGVXUCENESKYJXRITXXKPRLUGXRURTTTFIQTNTHZGMBVLXNFMAMAZYZVCXIOUKMPUMFOEQPGTBIEAEKGTLUTRYSHZKPRSEWCLUBXTXWYVBROZSNPRZHXLZBVYIGMKIAZMXMJITKSMYOBFURHFUNNYHBIEMQREMNVZFIIMCQMAYHTPVBUKRBAYBGUCHGDCAOCTNVEVZHHHVIAUTAYIQAVRBPRBRCIMBFCGANTOKPBXISYUOBBEKHDMAZSNLMMVRLTHTMAUTLCDXYEBXWRCFKTAYTWAYTBNLBVUNWYDIAJSBNSCGHEVULARZHXZIMRLLHQFNVTFHLDIGO"
ct_length = len(HW1_CipherText)

test = "SADWADADXZCBEAUTIFULBEAUTIFULBEAUTIFULBEAUTIFULASDSANDNASDNBEAUTIFUL"

# ___________________FIND KEY LENGTH BELOW_________________________________________________________

# Param {String} - Ciphertext that we will be looking for the key length
# Param {Int} - length of the subString that is the same within the cipherText
# Return {Dictionary} - the subString is the key and the {Int} # of characters between
#
# Kasiski examination will help us get the key length of vigenere cipher
# (tested) with random key "ABCDEFG"
def kasiski(cipherText, length):
    res = {}                                                    # "subStr" : char_between

    for i in range(len(cipherText)):
        subStr = cipherText[i:i+length]                         # String
        char_between = cipherText[i+length : ].find(subStr)+1   # int 
        if(char_between > 0):
            res[subStr] = char_between

    return res

# Param {Int} - find the individual divisior of a value{int}
# Return {List} - List of divisor for the value
#
# This will find the prime divisors of the value 
def find_divisor(val):
    res_arr = []

    count = 1
    quotient = val
    while(count <= quotient):
        if((quotient%count) == 0):
            res_arr.append(count)
            quotient = quotient/count
            count = 2
        else:
            count += 1

    return res_arr

# Param {Dictionary} - The dictionary returned from Kasiski function that contains substring of specify length and their # of chars between
# 
# Help prints out the Kasiski dictionary with the substrings and its # of characters in between
# and prints out the divisor of the # characters in between
def neat_print(dict):
    for (key, value) in dict.items():
        # +1 because it's pointing to the index
        print("Key= %s : Value= %d" % (key, value))
        print("Multiple of %d is %s\n" % (value, find_divisor(value)))

# neat_print(kasiski(HW1_CipherText, 8))

# Param {String} - the ciphertext
# Return {Dictionary} - 'the letter' : the frequency
#
# get the frequency of each letter in ciphertext
def freq(cipherText):
    letter_freq = {}

    for i in cipherText:
        if(i in letter_freq):
            letter_freq[i] += 1
        else:
            letter_freq[i] = 1
    return letter_freq

# Param {String} - the ciphertext
# Return {Int} - the estimated keylength
#
# Gets estimated value of keylength
def friedman(cipherText):
    kp = 0.067
    kr = 0.0385
    N = len(cipherText)

    sum = 0
    letter_freq = freq(cipherText)
    for (key, value) in letter_freq.items():
        sum = sum + (value*(value-1))

    ko = sum/(N*(N-1))
    res = (kp - kr)/(ko-kr)
    return res

print("key length estimation: %d" % friedman(HW1_CipherText))

def vigDecrypt(ciphertext, key):
    decrypted = ''
    for i, ch in enumerate(ciphertext):
        decrypted += unshiftLetter(ch, key[i % len(key)])
    return decrypted

def unshiftLetter(letter, keyLetter):
    letter = ord(letter) - ord("A")
    keyLetter = ord(keyLetter) - ord("A")
    new = (letter - keyLetter) % 26
    return chr(new + ord("A"))

def vigEncrypt(plaintext, key):
    encrypted = ''
    for i, ch in enumerate(plaintext):
        encrypted += shiftLetter(ch, key[i % len(key)])
    return encrypted

def shiftLetter(letter, keyLetter):
    letter = ord(letter) - ord("A")
    keyLetter = ord(keyLetter) - ord("A")
    new = (letter + keyLetter) % 26
    return chr(new + ord("A"))

# vigDecrypt(HW1_CipherText[:len(HW1_CipherText)-ACTUAL_KEY_LENGTH], HW1_CipherText[ACTUAL_KEY_LENGTH: ]) 

# Param {String} - the ciphertext
# Return {List} - list of strings of divided ciphertext
#
# put ciphertext into ACTUAL_KEY_LENGTH number of columns
def rewrite_ciphertext(cipherText):
    res = []                            # len of ACTUAL_KEY_LENGTH
    index = 0
    # Initialization of list
    for i in cipherText[0:ACTUAL_KEY_LENGTH]:
        res.append(i)

    for i in cipherText[ACTUAL_KEY_LENGTH: ]:
        if(index == ACTUAL_KEY_LENGTH):
            index = 0
        res[index] = res[index] + i
        index += 1
    return res                          # list of strings 

CIPHER_ARR = rewrite_ciphertext(HW1_CipherText)

# Param {List} - List of ciphertext divided
# Return {List} - List of highest frequency character within the column
#
# Gives you a list of the most frequent letter within the column 
def highest_freq(cipher_arr):
    res = []
    pair = ("", 0)

    for i in cipher_arr:
        count = 0
        highest = 0
        frequencies = freq(i)       # dictionary
        # compare the value of each elements in dictionary and get the largest value
        for (key, value) in frequencies.items():
            if(value > highest):
                highest = value
                pair = (key, value/len(i))
            if(count == ACTUAL_KEY_LENGTH-1):
                res.append(pair)
            count += 1
    return res

# Param {List} - String array of the highest frequency character
# Param {String} - A char from the English alphabet i.e ("E" , "T", "A")
# Return {List} - The shift within each column
#
# Subtracts the highest frequency letter by E to get the Key for that column
# Each column represents a Caesar cipher
def find_key(highest_freq_arr, letter):
    res = []
    for (i,j) in highest_freq_arr:
        res.append(unshiftLetter(i, letter))

    return res


### Finding Key

# print(CIPHER_ARR[0])
# print(freq(CIPHER_ARR[0]))

# print(CIPHER_ARR[1])
# print(freq(CIPHER_ARR[1]))

# print(CIPHER_ARR[2])
# print(freq(CIPHER_ARR[2]))

# print(CIPHER_ARR[3])
# print(freq(CIPHER_ARR[3]))

# print(CIPHER_ARR[4])
# print(freq(CIPHER_ARR[4]))

# print(CIPHER_ARR[5])
# print(freq(CIPHER_ARR[5]))

# print(CIPHER_ARR[6])
# print(freq(CIPHER_ARR[6]))

print("Letter of highest frequency in column: %s" % highest_freq(CIPHER_ARR)) 
print("Key?: %s" % find_key(highest_freq(CIPHER_ARR), "E"))   
print("Key?: %s" % find_key(highest_freq(CIPHER_ARR), "T"))
print("Decrypt Value: %s" % vigDecrypt(HW1_CipherText, ACTUAL_KEY))

# ct_difference = vigDecrypt(
#     HW1_CipherText[:ct_length - ACTUAL_KEY_LENGTH], HW1_CipherText[ACTUAL_KEY_LENGTH:]) # key elimination

# print("C - C[i+m: ] = %s " % vigDecrypt(HW1_CipherText[:ct_length - ACTUAL_KEY_LENGTH], HW1_CipherText[ACTUAL_KEY_LENGTH: ]))