常见压缩算法学习
文章目錄
- 無損壓縮算法理論基礎
- 信息熵
- 熵編碼
- 字典編碼
- 綜合通用無損壓縮算法
- 相關常見名詞說明
- java對幾種常見算法實現
- Snappy
- deflate算法
- Gzip算法
- huffman算法
- Lz4算法
- Lzo算法
- 使用方式
無損壓縮算法理論基礎
信息熵
信息熵是一個數學上頗為抽象的概念,在這里不妨把信息熵理解成某種特定信息的出現概率(離散隨機事件的出現概率)。一個系統越是有序,信息熵就越低;反之,一個系統越是混亂,信息熵就越高。信息熵也可以說是系統有序化程度的一個度量。
熵編碼
字典編碼
綜合通用無損壓縮算法
相關常見名詞說明
java對幾種常見算法實現
Snappy
Google開發的一個非常流行的壓縮算法,基于LZ77的思路編寫的快速數據壓縮與解壓縮
nappy是在谷歌內部生產環境中被許多項目使用的壓縮庫,包括BigTable,MapReduce和RPC等。谷歌表示算法庫針對性能做了調整,而不是針對壓縮比或與其他類似工具的兼容性。在Intel酷睿i7處理器上,其單核處理數據流的能力達到250M/s-500M/s。Snappy同時針對64位x86處理器進行了優化,在英特爾酷睿i7處理器單一核心實現了至少250MB/s的壓縮性能和500MB/ s的解壓縮性能。Snappy對于純文本的壓縮率為1.5-1.7,對于HTML是2-4,當然了對于JPEG、PNG和其他已經壓縮過的數據壓縮率為1.0。谷歌強勁吹捧Snappy的魯棒性,稱其是“即使面對損壞或惡意輸入也不會崩潰的設計”,并且在谷歌的生產環境中經過了PB級數據壓縮的考驗而穩定的。
依賴:
<dependency><groupId>org.xerial.snappy</groupId><artifactId>snappy-java</artifactId><version>1.1.7.5</version></dependency>Snappy java實現源碼:
package com.demo.rpc.compress;import java.io.IOException;import org.xerial.snappy.Snappy;/*** @author: weijie* @Date: 2020/9/24 14:31* @Description:Google開發的一個非常流行的壓縮算法,基于LZ77的思路編寫的快速數據壓縮與解壓縮** LZ77算法:如果文件中有兩塊內容相同的話,那么只要知道前一塊的位置和大小,我們就可以確定后一塊的內容* 所以我們可以用(兩者之間的距離,相同內容的長度)這樣一對信息,來替換后一對內容。由于(兩者之間的距離,相同* 內容的長度)這一對信息的大小,小于被替換內容的大小,所以文件得到壓縮。** @url: https://blog.csdn.net/zj57356498318/article/details/108248602****/ public class SnappyCompressor implements Compressor {public byte[] compress(byte[] array) throws IOException {if (array == null) {return null;}return Snappy.compress(array);}public byte[] unCompress(byte[] array) throws IOException {if (array == null) {return null;}return Snappy.uncompress(array);} }deflate算法
package com.demo.rpc.compress;import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.zip.DeflaterOutputStream; import java.util.zip.InflaterInputStream;public class DeflateCompress {//deflate解壓縮public static String unCompress(String inputString){byte[] bytes = Base64.getDecoder().decode(inputString);if(bytes == null || bytes.length == 0){return null;}ByteArrayOutputStream out = new ByteArrayOutputStream();ByteArrayInputStream in = new ByteArrayInputStream(bytes);try{InflaterInputStream inflater = new InflaterInputStream(in);byte[] buffer = new byte[256];int n;while((n = inflater.read(buffer)) >= 0){out.write(buffer, 0, n);}return out.toString("utf-8");}catch (Exception e){throw new RuntimeException("DeflaterUnCompressError", e);}}public static byte[] compress(byte[] bytes){ByteArrayOutputStream out = new ByteArrayOutputStream();DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(out);try {deflaterOutputStream.write(bytes);deflaterOutputStream.close();} catch (IOException e) {e.printStackTrace();}return out.toByteArray();}public static byte[] unCompress(byte[] bytes){ByteArrayOutputStream out = new ByteArrayOutputStream();ByteArrayInputStream in = new ByteArrayInputStream(bytes);try {InflaterInputStream inflater = new InflaterInputStream(in);byte[] buffer = new byte[256];int n;while((n = inflater.read(buffer)) >= 0){out.write(buffer, 0, n);}} catch (IOException e) {e.printStackTrace();}return out.toByteArray();}//deflate壓縮public static String compress(String original){if(original == null || original.length() == 0){return null;}ByteArrayOutputStream out = new ByteArrayOutputStream();DeflaterOutputStream deflater ;try{deflater = new DeflaterOutputStream(out);deflater.write(original.getBytes(StandardCharsets.UTF_8));deflater.close();return Base64.getEncoder().encodeToString(out.toByteArray());}catch (Exception e){throw new RuntimeException("DeflaterCompressError", e);}}}Gzip算法
package com.demo.rpc.compress;import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.util.Base64; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream;public class GzipCompress {private static final String GZIP_ENCODE_UTF_8 = "UTF-8";//GZip解壓縮public static String gzipUnCompress(String inputString){byte[] decode = Base64.getDecoder().decode(inputString);return unCompress(decode, GZIP_ENCODE_UTF_8);}public static String unCompress(byte[] bytes, String encoding){if(bytes == null || bytes.length == 0){return null;}ByteArrayOutputStream out = new ByteArrayOutputStream();ByteArrayInputStream in = new ByteArrayInputStream(bytes);try{GZIPInputStream ungzip = new GZIPInputStream(in);byte[] buffer = new byte[256];int n;while((n = ungzip.read(buffer)) >= 0){out.write(buffer, 0, n);}return out.toString(encoding);}catch (Exception e){throw new RuntimeException("GzipUnCompressError", e);}}//Gzip壓縮public static String gzipCompress(String original){return Base64.getEncoder().encodeToString(compress(original, GZIP_ENCODE_UTF_8));}public static byte[] compress(String str, String encoding){if(str == null || str.length() == 0){return null;}ByteArrayOutputStream out = new ByteArrayOutputStream();GZIPOutputStream gzip ;try{gzip = new GZIPOutputStream(out);gzip.write(str.getBytes(encoding));gzip.close();}catch (Exception e){throw new RuntimeException("GzipCompressError", e);}return out.toByteArray();}}huffman算法
package com.demo.rpc.compress;import java.io.*; import java.util.*;/*** @Date: 2020/9/24 15:14* @url:https://blog.csdn.net/qq_41966475/article/details/108550909?utm_medium=distribute.pc_relevant.none-task-blog-title-5&spm=1001.2101.3001.4242*/ public class HuffmanCompress {//數據的解壓public byte[] unCompress(Map<Byte, String> huffmanCodes, byte[] huffmanBytes) {StringBuilder stringBuilder = new StringBuilder();for (int i = 0; i < huffmanBytes.length; i++) {byte b = huffmanBytes[i];boolean flag = (i == huffmanBytes.length - 1);stringBuilder.append(byteToBitString(!flag, b));}System.out.print(stringBuilder);System.out.println();Map<String, Byte> map = new HashMap<>();for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) {map.put(entry.getValue(), entry.getKey());}List<Byte> list = new ArrayList<>();for (int i = 0; i < stringBuilder.length(); ) {int count = 1;boolean flag = true;Byte b = null;while (flag) {String key = stringBuilder.substring(i, i + count);b = map.get(key);if (b == null) {count++;} else {flag = false;}}list.add(b);i += count;}byte[] b = new byte[list.size()];for (int i = 0; i < b.length; i++) {b[i] = list.get(i);}return b;}//把壓縮的byte數組中的十進制數轉化為2進制數private String byteToBitString(boolean flag, byte b) {int temp = b;if (flag) {temp |= 256;}String str = Integer.toBinaryString(temp);if (flag) {return str.substring(str.length() - 8);} else {return str;}}//封裝壓縮操作public byte[] compress(Map<Byte, String> huffmanCodes , byte[] bytes) {List<Node> nodes = getNodes(bytes);Node root = creatHuffmanTree(nodes);getCodes(huffmanCodes, root);byte[] huffmanCodeBytes = zip(bytes, huffmanCodes);return huffmanCodeBytes;}/*** @param bytes 原始的字符串對應的數組* @param huffmanCodes 生成的哈夫曼樹編碼map* @return 返回哈夫曼編碼處理后的byte[]*/private byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {StringBuilder builder = new StringBuilder();for (byte b : bytes) {builder.append(huffmanCodes.get(b));}int len;if (builder.length() % 8 == 0) {len = builder.length() / 8;} else {len = builder.length() / 8 + 1;}byte[] huffmanCodeBytes = new byte[len];int index = 0;for (int i = 0; i < builder.length(); i = i + 8) {String strByte;if (i + 8 > builder.length()) {strByte = builder.substring(i);} else {strByte = builder.substring(i, i + 8);}huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte, 2);index++;}return huffmanCodeBytes;}// Map<Byte, String> huffmanCodes = new HashMap<>(); // // StringBuilder stringBuilder = new StringBuilder();private Map<Byte, String> getCodes(Map<Byte, String> huffmanCodes, Node root) {if (root == null) {return null;}getCodes(huffmanCodes, root.left, "0", new StringBuilder());getCodes(huffmanCodes, root.right, "1", new StringBuilder());return huffmanCodes;}/*** 將傳入的node節點的所有葉子節點哈夫曼編碼得到,并放入到huffmanCode集合中** @param node 傳入節點* @param code 路徑,左0右1* @param stringBuilder 用于拼接路徑*/private void getCodes(Map<Byte, String> huffmanCodes, Node node, String code, StringBuilder stringBuilder) {StringBuilder builder = new StringBuilder(stringBuilder);builder.append(code);if (node != null) {if (node.data == null) {getCodes(huffmanCodes, node.left, "0", builder);getCodes(huffmanCodes, node.right, "1", builder);} else {huffmanCodes.put(node.data, builder.toString());}}}/*** @param bytes 接收字節數組* @return 返回的就算List*/private List<Node> getNodes(byte[] bytes) {List<Node> nodes = new ArrayList<>();Map<Byte, Integer> counts = new HashMap<>();for (Byte b : bytes) {Integer count = counts.get(b);if (count == null) {counts.put(b, 1);} else {counts.put(b, count + 1);}}for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {nodes.add(new Node(entry.getKey(), entry.getValue()));}return nodes;}//通過List創建哈夫曼樹private Node creatHuffmanTree(List<Node> nodes) {while (nodes.size() > 1) {Collections.sort(nodes);Node leftNode = nodes.get(0);Node rightNode = nodes.get(1);Node parent = new Node(null, leftNode.weight + rightNode.weight);parent.left = leftNode;parent.right = rightNode;nodes.remove(leftNode);nodes.remove(rightNode);nodes.add(parent);}return nodes.get(0);}}//創建節點 class Node implements Comparable<Node> {Byte data;int weight;Node left;Node right;public Node(Byte data, int weight) {this.data = data;this.weight = weight;}@Overridepublic int compareTo(Node o) {return this.weight - o.weight;}@Overridepublic String toString() {return "Node{" +"data=" + data +", weight=" + weight +'}';} }Lz4算法
依賴:
<dependency><groupId>org.lz4</groupId><artifactId>lz4-java</artifactId><version>1.7.1</version></dependency>Lz4算法java實現源碼:
package com.demo.rpc.compress;import net.jpountz.lz4.LZ4BlockInputStream; import net.jpountz.lz4.LZ4BlockOutputStream; import net.jpountz.lz4.LZ4Compressor; import net.jpountz.lz4.LZ4Factory;import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.nio.charset.StandardCharsets; import java.util.Base64;public class Lz4Compress {//lz4解壓縮public static String unCompress(String str){byte[] decode = Base64.getDecoder().decode(str.getBytes());ByteArrayOutputStream baos = new ByteArrayOutputStream();try{LZ4BlockInputStream lzis = new LZ4BlockInputStream(new ByteArrayInputStream(decode));int count;byte[] buffer = new byte[2048];while ((count = lzis.read(buffer)) != -1) {baos.write(buffer, 0, count);}lzis.close();return baos.toString("utf-8");}catch (Exception e){throw new RuntimeException("lz4UnCompressError", e);}}public static byte[] unCompress(byte[] bytes){ByteArrayOutputStream baos = new ByteArrayOutputStream();try{LZ4BlockInputStream lzis = new LZ4BlockInputStream(new ByteArrayInputStream(bytes));int count;byte[] buffer = new byte[2048];while ((count = lzis.read(buffer)) != -1) {baos.write(buffer, 0, count);}lzis.close();return baos.toByteArray();}catch (Exception e){throw new RuntimeException("lz4UnCompressError", e);}}//lz4壓縮public static String compress(String str){LZ4Factory factory = LZ4Factory.fastestInstance();ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();LZ4Compressor compressor = factory.fastCompressor();try{LZ4BlockOutputStream compressedOutput = new LZ4BlockOutputStream(byteOutput, 2048, compressor);compressedOutput.write(str.getBytes(StandardCharsets.UTF_8));compressedOutput.close();return Base64.getEncoder().encodeToString(byteOutput.toByteArray());}catch (Exception e){throw new RuntimeException("lz4CompressError", e);}}public static byte[] compress(byte[] bytes){LZ4Factory factory = LZ4Factory.fastestInstance();ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();LZ4Compressor compressor = factory.fastCompressor();try{LZ4BlockOutputStream compressedOutput = new LZ4BlockOutputStream(byteOutput, 2048, compressor);compressedOutput.write(bytes);compressedOutput.close();return byteOutput.toByteArray();}catch (Exception e){throw new RuntimeException("lz4CompressError", e);}} }Lzo算法
依賴:
<dependency><groupId>org.anarres.lzo</groupId><artifactId>lzo-core</artifactId><version>1.0.6</version></dependency>Lzo算法java實現源碼:
package com.demo.rpc.compress;import org.anarres.lzo.*;import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.nio.charset.StandardCharsets; import java.util.Base64;public class LzoCompress {//lzo解壓縮public static String unCompress(String str){LzoDecompressor decompressor = LzoLibrary.getInstance().newDecompressor(LzoAlgorithm.LZO1X, null);try{ByteArrayOutputStream os = new ByteArrayOutputStream();ByteArrayInputStream is = new ByteArrayInputStream(Base64.getDecoder().decode(str.getBytes(StandardCharsets.UTF_8)));LzoInputStream lis = new LzoInputStream(is, decompressor);int count;byte[] buffer = new byte[256];while((count = lis.read(buffer)) != -1){os.write(buffer, 0, count);}return os.toString();}catch (Exception e){throw new RuntimeException("lzoUnCompressError", e);}}public static byte[] unCompress(byte[] bytes){LzoDecompressor decompressor = LzoLibrary.getInstance().newDecompressor(LzoAlgorithm.LZO1X, null);try{ByteArrayOutputStream os = new ByteArrayOutputStream();ByteArrayInputStream is = new ByteArrayInputStream(bytes);LzoInputStream lis = new LzoInputStream(is, decompressor);int count;byte[] buffer = new byte[256];while((count = lis.read(buffer)) != -1){os.write(buffer, 0, count);}return os.toByteArray();}catch (Exception e){throw new RuntimeException("lzoUnCompressError", e);}}public static byte[] compress(byte[] bytes){LzoCompressor compressor = LzoLibrary.getInstance().newCompressor(LzoAlgorithm.LZO1X, null);ByteArrayOutputStream os = new ByteArrayOutputStream();LzoOutputStream louts = new LzoOutputStream(os, compressor);try{louts.write(bytes);louts.close();return os.toByteArray();}catch (Exception e){throw new RuntimeException("LzoCompressError", e);}}public static String compress(String str){LzoCompressor compressor = LzoLibrary.getInstance().newCompressor(LzoAlgorithm.LZO1X, null);ByteArrayOutputStream os = new ByteArrayOutputStream();LzoOutputStream louts = new LzoOutputStream(os, compressor);try{louts.write(str.getBytes(StandardCharsets.UTF_8));louts.close();return Base64.getEncoder().encodeToString(os.toByteArray());}catch (Exception e){throw new RuntimeException("LzoCompressError", e);}}}使用方式
package com.demo.rpc.compress;import org.junit.Test;import java.io.IOException; import java.util.HashMap; import java.util.Map;public class CompressorTest {String str = "http://www.baidu.com https://fanyi.baidu.com/ http://www.baidu.com ";@Testpublic void snappyCompress() throws IOException {SnappyCompressor snappyCompressor = new SnappyCompressor();byte[] compressed = snappyCompressor.compress(str.getBytes());System.out.println("壓縮前數組大小: " + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);byte[] unCompressed = snappyCompressor.unCompress(compressed);System.out.println("原字符串:" + new String(unCompressed));}@Testpublic void gzipCompress(){String encode = "utf-8";byte[] compressed = GzipCompress.compress(str, encode);String unCompressed = GzipCompress.unCompress(compressed, encode);System.out.println("壓縮前數組大小:" + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);System.out.println("原字符串:" + new String(unCompressed));}@Testpublic void deflateCompress(){byte[] compressed = DeflateCompress.compress(str.getBytes());byte[] unCompressed = DeflateCompress.unCompress(compressed);System.out.println("壓縮前數組大小:" + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);System.out.println("原字符串:" + new String(unCompressed));}@Testpublic void huffmanCompress(){HuffmanCompress huffmanCompress = new HuffmanCompress();Map<Byte, String> huffmanCodec = new HashMap<>();byte[] compressed = huffmanCompress.compress(huffmanCodec, str.getBytes());byte[] unCompressed = huffmanCompress.unCompress(huffmanCodec, compressed);System.out.println("壓縮前數組大小:" + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);System.out.println("原字符串:" + new String(unCompressed));}@Testpublic void lzoCompress(){byte[] compressed = LzoCompress.compress(str.getBytes());byte[] unCompressed = LzoCompress.unCompress(compressed);System.out.println("壓縮前數組大小:" + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);System.out.println("原字符串:" + new String(unCompressed));}@Testpublic void lz4Compress(){byte[] compressed = Lz4Compress.compress(str.getBytes());byte[] unCompressed = Lz4Compress.unCompress(compressed);System.out.println("壓縮前數組大小:" + str.getBytes().length);System.out.println("壓縮后數組大小:" + compressed.length);System.out.println("原字符串:" + new String(unCompressed));}}總結
- 上一篇: 【转载保存】dubbo学习笔记
- 下一篇: HTTP与HTTPS的区别[转载]