Huffman / src / huffman / HuffmanCoding.java
HuffmanCoding.java
Raw
package huffman;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import javax.swing.BoxLayout;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.filechooser.FileNameExtensionFilter;

/**
 * This class contains methods which, when used together, perform the entire
 * Huffman Coding encoding and decoding process
 */
public class HuffmanCoding extends JPanel implements ActionListener {
    ArrayList<CharFreq> sortedList;
    TreeNode huffmanTree;
    String[] encodings;
    private JLabel fileMessage = new JLabel("No file chosen");
    private JButton button = new JButton("Select File");
    private JButton submit = new JButton("Submit");
    private JLabel encodeMessage = new JLabel();
    private JLabel decodeMessage = new JLabel();
    private JFileChooser fileChooser = new JFileChooser();
    private FileNameExtensionFilter textFiles = new FileNameExtensionFilter("Text Files", "txt");

    public HuffmanCoding() {
        this.setLayout(new BoxLayout(this, BoxLayout.Y_AXIS));
        // this.setLayout(new FlowLayout(FlowLayout.CENTER));;
        this.setOpaque(false);
        
        JPanel fileInput = new JPanel();
        fileInput.setAlignmentX(CENTER_ALIGNMENT);
        fileInput.setLayout(new BoxLayout(fileInput, BoxLayout.X_AXIS));
        // fileInput.setLayout(new FlowLayout(FlowLayout.CENTER));
        fileInput.setOpaque(false);
        fileInput.add(new JLabel("Upload a file:"));
		button.addActionListener(this);
		fileInput.add(button);
        fileInput.add(fileMessage);
        this.add(fileInput);
        submit.setAlignmentX(CENTER_ALIGNMENT);
        submit.addActionListener(this);
        submit.setEnabled(false);
        this.add(submit);
        encodeMessage.setAlignmentX(CENTER_ALIGNMENT);
        this.add(encodeMessage);
        decodeMessage.setAlignmentX(CENTER_ALIGNMENT);
        this.add(decodeMessage);
    }

    @Override
	public void actionPerformed(ActionEvent e) {
		if (e.getSource() == button) {
			fileChooser.setCurrentDirectory(new File(".")); // sets current directory
			fileChooser.setFileFilter(textFiles); // sets filter
			int response = fileChooser.showOpenDialog(null); // select file to open
			// int response = fileChooser.showSaveDialog(null); // select file to save
			if (response == JFileChooser.APPROVE_OPTION) {
				File file = fileChooser.getSelectedFile();
                if (textFiles.accept(file)) {
                    fileMessage.setText(file.getName());
                    submit.setEnabled(true);
                } else {
                    fileMessage.setText("Invalid text file!");
                    submit.setEnabled(false);
                }
			}
		}
        if (e.getSource() == submit) {
            File file = fileChooser.getSelectedFile();
			if (file != null) {
				// System.out.println(file.getName());
                sortedList = makeSortedList(file.getAbsolutePath());
                huffmanTree = makeTree(sortedList);
                encodings = makeEncodings(huffmanTree);
                String baseFileName = getBaseName(file.getAbsolutePath());
                encodeFromArray(encodings, file.getAbsolutePath(), baseFileName + "_encoded");
                encodeMessage.setText(file.getName() + " has been encoded to " + getBaseName(file.getName()) + "_encoded");
                decode(baseFileName + "_encoded", huffmanTree, baseFileName + "_decoded.txt");
                decodeMessage.setText(getBaseName(file.getName()) + "_encoded has been decoded to " + getBaseName(file.getName()) + "_decoded.txt");
			}
		}
	}

    /**
     * Writes a given string of 1's and 0's to the given file byte by byte and NOT
     * as characters of 1 and 0 which take up 8 bits each
     * 
     * @param filename  The file to write to (doesn't need to exist yet)
     * @param bitString The string of 1's and 0's to write to the file in bits
     */
    public static void writeBitString(String filename, String bitString) {
        byte[] bytes = new byte[bitString.length() / 8 + 1];
        int bytesIndex = 0, byteIndex = 0, currentByte = 0;

        // Pad the string with initial zeroes and then a one in order to bring
        // its length to a multiple of 8. When reading, the 1 signifies the
        // end of padding.
        int padding = 8 - (bitString.length() % 8);
        String pad = "";
        for (int i = 0; i < padding - 1; i++)
            pad = pad + "0";
        pad = pad + "1";
        bitString = pad + bitString;

        // For every bit, add it to the right spot in the corresponding byte,
        // and store bytes in the array when finished
        for (char c : bitString.toCharArray()) {
            if (c != '1' && c != '0') {
                System.out.println("Invalid characters in bitstring");
                System.exit(1);
            }

            if (c == '1')
                currentByte += 1 << (7 - byteIndex);
            byteIndex++;

            if (byteIndex == 8) {
                bytes[bytesIndex] = (byte) currentByte;
                bytesIndex++;
                currentByte = 0;
                byteIndex = 0;
            }
        }

        // Write the array of bytes to the provided file
        try {
            FileOutputStream out = new FileOutputStream(filename);
            out.write(bytes);
            out.close();
        } catch (Exception e) {
            System.err.println("Error when writing to file!");
        }
    }

    /**
     * Reads a given file byte by byte, and returns a string of 1's and 0's
     * representing the bits in the file
     * 
     * @param filename The encoded file to read from
     * @return String of 1's and 0's representing the bits in the file
     */
    public static String readBitString(String filename) {
        String bitString = "";

        try {
            FileInputStream in = new FileInputStream(filename);
            File file = new File(filename);

            byte bytes[] = new byte[(int) file.length()];
            in.read(bytes);
            in.close();

            // For each byte read, convert it to a binary string of length 8 and add it
            // to the bit string
            for (byte b : bytes)
                bitString = bitString + String.format("%8s", Integer.toBinaryString(b & 0xFF)).replace(' ', '0');

            // Detect the first 1 signifying the end of padding, then remove the first few
            // characters, including the 1
            for (int i = 0; i < 8; i++) {
                if (bitString.charAt(i) == '1')
                    return bitString.substring(i + 1);
            }

            return bitString.substring(8);
        } catch (Exception e) {
            System.out.println("Error while reading file!");
            return "";
        }
    }

    /**
     * Reads a given text file character by character, and returns an arraylist of
     * CharFreq objects with frequency > 0, sorted by frequency
     * 
     * @param filename The text file to read from
     * @return Arraylist of CharFreq objects, sorted by frequency
     */
    public static ArrayList<CharFreq> makeSortedList(String filename) {
        StdIn.setFile(filename);
        int[] freq = new int[128];
        ArrayList<CharFreq> list = new ArrayList<CharFreq>();
        double numChar = 0;
        while (StdIn.hasNextChar()) {
            freq[StdIn.readChar()]++;
            numChar++;
        }
        int distinctChars = 0;
        for (int i = 0; i < freq.length; i++) {
            if (freq[i] > 0) {
                list.add(new CharFreq((char) i, freq[i] / numChar));
                distinctChars++;
            }
        }
        // If there is only 1 distinct character, add another character with ASCII value
        // one more than the distinct character with probOcc 0
        if (distinctChars == 1) {
            CharFreq unique = list.get(0);
            // Wrap around to 0 at 127
            int asc = 0;
            if ((int) unique.getCharacter() != 127)
                asc = unique.getCharacter() + 1;
            list.add(new CharFreq((char) asc, 0));
        }
        Collections.sort(list);
        return list;
    }

    /**
     * Uses a given sorted arraylist of CharFreq objects to build a huffman coding
     * tree
     * 
     * @param sortedList The arraylist of CharFreq objects to build the tree from
     * @return A TreeNode representing the root of the huffman coding tree
     */
    public static TreeNode makeTree(ArrayList<CharFreq> sortedList) {
        Queue<TreeNode> source = new Queue<>(), target = new Queue<>();
        for (int i = 0; i < sortedList.size(); i++)
            source.enqueue(new TreeNode(sortedList.get(i), null, null));
        TreeNode node1 = source.dequeue();
        TreeNode node2 = source.dequeue();
        double prob1 = node1.getData().getProbOccurrence();
        double prob2 = node2.getData().getProbOccurrence();
        target.enqueue(new TreeNode(new CharFreq(null, prob1 + prob2), node1, node2));
        while (!source.isEmpty() || target.size() != 1) {
            if (!source.isEmpty() && source.peek().getData().getProbOccurrence() <= target.peek().getData().getProbOccurrence())
                node1 = source.dequeue();
            else
                node1 = target.dequeue();
            if (!source.isEmpty() && (target.isEmpty() || source.peek().getData().getProbOccurrence() <= target.peek().getData().getProbOccurrence()))
                node2 = source.dequeue();
            else
                node2 = target.dequeue();
            prob1 = node1.getData().getProbOccurrence();
            prob2 = node2.getData().getProbOccurrence();
            target.enqueue(new TreeNode(new CharFreq(null, prob1 + prob2), node1, node2));
        }

        return target.peek();
    }

    /**
     * Uses a given huffman coding tree to create a string array of size 128, where
     * each index in the array contains that ASCII character's bitstring encoding.
     * Characters not present in the huffman coding tree should have their spots in
     * the array left null
     * 
     * @param root The root of the given huffman coding tree
     * @return Array of strings containing only 1's and 0's representing character
     *         encodings
     */
    public static String[] makeEncodings(TreeNode root) {
        String[] arr = new String[128];
        inOrder(root, "", arr);
        return arr;
    }

    private static void inOrder(TreeNode x, String code, String[] a) {
        if (x == null)
            return;
        inOrder(x.getLeft(), code + "0", a);
        if (x.getData().getCharacter() != null)
            a[x.getData().getCharacter()] = code;
        inOrder(x.getRight(), code + "1", a);
    }

    /**
     * Using a given string array of encodings, a given text file, and a file name
     * to encode into, this method makes use of the writeBitString method to write
     * the final encoding of 1's and 0's to the encoded file.
     * 
     * @param encodings   The array containing binary string encodings for each
     *                    ASCII character
     * @param textFile    The text file which is to be encoded
     * @param encodedFile The file name into which the text file is to be encoded
     */
    public static void encodeFromArray(String[] encodings, String textFile, String encodedFile) {
        StdIn.setFile(textFile);
        String bitString = "";
        while (StdIn.hasNextChar())
            bitString += encodings[StdIn.readChar()];
        writeBitString(encodedFile, bitString);
    }

    /**
     * Using a given encoded file name and a huffman coding tree, this method makes
     * use of the readBitString method to convert the file into a bit string, then
     * decodes the bit string using the tree, and writes it to a file.
     * 
     * @param encodedFile The file which contains the encoded text we want to decode
     * @param root        The root of your Huffman Coding tree
     * @param decodedFile The file which you want to decode into
     */
    public static void decode(String encodedFile, TreeNode root, String decodedFile) {
        StdOut.setFile(decodedFile);
        String encode = readBitString(encodedFile);
        char[] cArr = encode.toCharArray();
        TreeNode ptr = root;
        for (char c : cArr) {
            if (c == '0')
                ptr = ptr.getLeft();
            else
                ptr = ptr.getRight();
            if (ptr.getData().getCharacter() != null) {
                StdOut.print(ptr.getData().getCharacter());
                ptr = root;
            }
        }
    }

    /**
     * Gets the base name, without extension, of given file name.
     * e.g. getBaseName("file.txt") will return "file"
     *
     * @param fileName
     * @return the base name
     */
    public static String getBaseName(String fileName) {
        int index = fileName.lastIndexOf('.');
        if (index == -1)
            return fileName;
        else
            return fileName.substring(0, index);
    }

    public static void main(String[] args) {
        new WindowFrame("Huffman Coding", new HuffmanCoding());
        // if (args.length < 3) {
        //     StdOut.println("Execute: java -cp bin huffman.HuffmanCoding <input file> <encoded filename> <decoded filename>");
        //     Driver.main(args);
        //     return;
        // }
        // ArrayList<CharFreq> sortedList = makeSortedList(args[0]);
        // TreeNode huffmanTree = makeTree(sortedList);
        // String[] encodings = makeEncodings(huffmanTree);
        // encodeFromArray(encodings, args[0], args[1]);
        // decode(args[1], huffmanTree, args[2]);
    }
}