Add LZO1X algorithm to the kernel

This is a hybrid version of the patch to add the LZO1X compression
algorithm to the kernel.  Nitin and myself have merged the best parts of
the various patches to form this version which we're both happy with (and
are jointly signing off).

The performance of this version is equivalent to the original minilzo code
it was based on.  Bytecode comparisons have also been made on ARM, i386 and
x86_64 with favourable results.

There are several users of LZO lined up including jffs2, crypto and reiser4
since its much faster than zlib.

Signed-off-by: Nitin Gupta <nitingupta910@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Richard Purdie 2007-07-10 17:22:24 -07:00 committed by Linus Torvalds
parent 4c75f7416f
commit 64c70b1cf4
7 changed files with 580 additions and 0 deletions

44
include/linux/lzo.h Normal file
View file

@ -0,0 +1,44 @@
#ifndef __LZO_H__
#define __LZO_H__
/*
* LZO Public Kernel Interface
* A mini subset of the LZO real-time data compression library
*
* Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
* Changed for kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *))
#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS
#define lzo1x_worst_compress(x) (x + (x / 64) + 16 + 3)
/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */
int lzo1x_1_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
/* safe decompression with overrun testing */
int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len);
/*
* Return values (< 0 = Error)
*/
#define LZO_E_OK 0
#define LZO_E_ERROR (-1)
#define LZO_E_OUT_OF_MEMORY (-2)
#define LZO_E_NOT_COMPRESSIBLE (-3)
#define LZO_E_INPUT_OVERRUN (-4)
#define LZO_E_OUTPUT_OVERRUN (-5)
#define LZO_E_LOOKBEHIND_OVERRUN (-6)
#define LZO_E_EOF_NOT_FOUND (-7)
#define LZO_E_INPUT_NOT_CONSUMED (-8)
#define LZO_E_NOT_YET_IMPLEMENTED (-9)
#endif

View file

@ -64,6 +64,12 @@ config ZLIB_INFLATE
config ZLIB_DEFLATE
tristate
config LZO_COMPRESS
tristate
config LZO_DECOMPRESS
tristate
#
# Generic allocator support is selected if needed
#

View file

@ -49,6 +49,8 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
obj-$(CONFIG_LZO_COMPRESS) += lzo/
obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
obj-$(CONFIG_TEXTSEARCH) += textsearch.o
obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o

5
lib/lzo/Makefile Normal file
View file

@ -0,0 +1,5 @@
lzo_compress-objs := lzo1x_compress.o
lzo_decompress-objs := lzo1x_decompress.o
obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o
obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o

226
lib/lzo/lzo1x_compress.c Normal file
View file

@ -0,0 +1,226 @@
/*
* LZO1X Compressor from MiniLZO
*
* Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
* Changed for kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/lzo.h>
#include <asm/unaligned.h>
#include "lzodefs.h"
static noinline size_t
_lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len, void *wrkmem)
{
const unsigned char * const in_end = in + in_len;
const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
const unsigned char ** const dict = wrkmem;
const unsigned char *ip = in, *ii = ip;
const unsigned char *end, *m, *m_pos;
size_t m_off, m_len, dindex;
unsigned char *op = out;
ip += 4;
for (;;) {
dindex = ((0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
m_pos = dict[dindex];
if (m_pos < in)
goto literal;
if (ip == m_pos || (ip - m_pos) > M4_MAX_OFFSET)
goto literal;
m_off = ip - m_pos;
if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
goto try_match;
dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
m_pos = dict[dindex];
if (m_pos < in)
goto literal;
if (ip == m_pos || (ip - m_pos) > M4_MAX_OFFSET)
goto literal;
m_off = ip - m_pos;
if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
goto try_match;
goto literal;
try_match:
if (get_unaligned((const unsigned short *)m_pos)
== get_unaligned((const unsigned short *)ip)) {
if (likely(m_pos[2] == ip[2]))
goto match;
}
literal:
dict[dindex] = ip;
++ip;
if (unlikely(ip >= ip_end))
break;
continue;
match:
dict[dindex] = ip;
if (ip != ii) {
size_t t = ip - ii;
if (t <= 3) {
op[-2] |= t;
} else if (t <= 18) {
*op++ = (t - 3);
} else {
size_t tt = t - 18;
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
*op++ = tt;
}
do {
*op++ = *ii++;
} while (--t > 0);
}
ip += 3;
if (m_pos[3] != *ip++ || m_pos[4] != *ip++
|| m_pos[5] != *ip++ || m_pos[6] != *ip++
|| m_pos[7] != *ip++ || m_pos[8] != *ip++) {
--ip;
m_len = ip - ii;
if (m_off <= M2_MAX_OFFSET) {
m_off -= 1;
*op++ = (((m_len - 1) << 5)
| ((m_off & 7) << 2));
*op++ = (m_off >> 3);
} else if (m_off <= M3_MAX_OFFSET) {
m_off -= 1;
*op++ = (M3_MARKER | (m_len - 2));
goto m3_m4_offset;
} else {
m_off -= 0x4000;
*op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
| (m_len - 2));
goto m3_m4_offset;
}
} else {
end = in_end;
m = m_pos + M2_MAX_LEN + 1;
while (ip < end && *m == *ip) {
m++;
ip++;
}
m_len = ip - ii;
if (m_off <= M3_MAX_OFFSET) {
m_off -= 1;
if (m_len <= 33) {
*op++ = (M3_MARKER | (m_len - 2));
} else {
m_len -= 33;
*op++ = M3_MARKER | 0;
goto m3_m4_len;
}
} else {
m_off -= 0x4000;
if (m_len <= M4_MAX_LEN) {
*op++ = (M4_MARKER
| ((m_off & 0x4000) >> 11)
| (m_len - 2));
} else {
m_len -= M4_MAX_LEN;
*op++ = (M4_MARKER
| ((m_off & 0x4000) >> 11));
m3_m4_len:
while (m_len > 255) {
m_len -= 255;
*op++ = 0;
}
*op++ = (m_len);
}
}
m3_m4_offset:
*op++ = ((m_off & 63) << 2);
*op++ = (m_off >> 6);
}
ii = ip;
if (unlikely(ip >= ip_end))
break;
}
*out_len = op - out;
return in_end - ii;
}
int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
size_t *out_len, void *wrkmem)
{
const unsigned char *ii;
unsigned char *op = out;
size_t t;
if (unlikely(in_len <= M2_MAX_LEN + 5)) {
t = in_len;
} else {
t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
op += *out_len;
}
if (t > 0) {
ii = in + in_len - t;
if (op == out && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[-2] |= t;
} else if (t <= 18) {
*op++ = (t - 3);
} else {
size_t tt = t - 18;
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
*op++ = tt;
}
do {
*op++ = *ii++;
} while (--t > 0);
}
*op++ = M4_MARKER | 1;
*op++ = 0;
*op++ = 0;
*out_len = op - out;
return LZO_E_OK;
}
EXPORT_SYMBOL_GPL(lzo1x_1_compress);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X-1 Compressor");

254
lib/lzo/lzo1x_decompress.c Normal file
View file

@ -0,0 +1,254 @@
/*
* LZO1X Decompressor from MiniLZO
*
* Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
* Changed for kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/lzo.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include "lzodefs.h"
#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
#define COPY4(dst, src) \
put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len)
{
const unsigned char * const ip_end = in + in_len;
unsigned char * const op_end = out + *out_len;
const unsigned char *ip = in, *m_pos;
unsigned char *op = out;
size_t t;
*out_len = 0;
if (*ip > 17) {
t = *ip++ - 17;
if (t < 4)
goto match_next;
if (HAVE_OP(t, op_end, op))
goto output_overrun;
if (HAVE_IP(t + 1, ip_end, ip))
goto input_overrun;
do {
*op++ = *ip++;
} while (--t > 0);
goto first_literal_run;
}
while ((ip < ip_end)) {
t = *ip++;
if (t >= 16)
goto match;
if (t == 0) {
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
while (*ip == 0) {
t += 255;
ip++;
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
}
t += 15 + *ip++;
}
if (HAVE_OP(t + 3, op_end, op))
goto output_overrun;
if (HAVE_IP(t + 4, ip_end, ip))
goto input_overrun;
COPY4(op, ip);
op += 4;
ip += 4;
if (--t > 0) {
if (t >= 4) {
do {
COPY4(op, ip);
op += 4;
ip += 4;
t -= 4;
} while (t >= 4);
if (t > 0) {
do {
*op++ = *ip++;
} while (--t > 0);
}
} else {
do {
*op++ = *ip++;
} while (--t > 0);
}
}
first_literal_run:
t = *ip++;
if (t >= 16)
goto match;
m_pos = op - (1 + M2_MAX_OFFSET);
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
if (HAVE_LB(m_pos, out, op))
goto lookbehind_overrun;
if (HAVE_OP(3, op_end, op))
goto output_overrun;
*op++ = *m_pos++;
*op++ = *m_pos++;
*op++ = *m_pos;
goto match_done;
do {
match:
if (t >= 64) {
m_pos = op - 1;
m_pos -= (t >> 2) & 7;
m_pos -= *ip++ << 3;
t = (t >> 5) - 1;
if (HAVE_LB(m_pos, out, op))
goto lookbehind_overrun;
if (HAVE_OP(t + 3 - 1, op_end, op))
goto output_overrun;
goto copy_match;
} else if (t >= 32) {
t &= 31;
if (t == 0) {
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
while (*ip == 0) {
t += 255;
ip++;
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
}
t += 31 + *ip++;
}
m_pos = op - 1;
m_pos -= le16_to_cpu(get_unaligned(
(const unsigned short *)ip)) >> 2;
ip += 2;
} else if (t >= 16) {
m_pos = op;
m_pos -= (t & 8) << 11;
t &= 7;
if (t == 0) {
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
while (*ip == 0) {
t += 255;
ip++;
if (HAVE_IP(1, ip_end, ip))
goto input_overrun;
}
t += 7 + *ip++;
}
m_pos -= le16_to_cpu(get_unaligned(
(const unsigned short *)ip) >> 2);
ip += 2;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
} else {
m_pos = op - 1;
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
if (HAVE_LB(m_pos, out, op))
goto lookbehind_overrun;
if (HAVE_OP(2, op_end, op))
goto output_overrun;
*op++ = *m_pos++;
*op++ = *m_pos;
goto match_done;
}
if (HAVE_LB(m_pos, out, op))
goto lookbehind_overrun;
if (HAVE_OP(t + 3 - 1, op_end, op))
goto output_overrun;
if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
COPY4(op, m_pos);
op += 4;
m_pos += 4;
t -= 4 - (3 - 1);
do {
COPY4(op, m_pos);
op += 4;
m_pos += 4;
t -= 4;
} while (t >= 4);
if (t > 0)
do {
*op++ = *m_pos++;
} while (--t > 0);
} else {
copy_match:
*op++ = *m_pos++;
*op++ = *m_pos++;
do {
*op++ = *m_pos++;
} while (--t > 0);
}
match_done:
t = ip[-2] & 3;
if (t == 0)
break;
match_next:
if (HAVE_OP(t, op_end, op))
goto output_overrun;
if (HAVE_IP(t + 1, ip_end, ip))
goto input_overrun;
*op++ = *ip++;
if (t > 1) {
*op++ = *ip++;
if (t > 2)
*op++ = *ip++;
}
t = *ip++;
} while (ip < ip_end);
}
*out_len = op - out;
return LZO_E_EOF_NOT_FOUND;
eof_found:
*out_len = op - out;
return (ip == ip_end ? LZO_E_OK :
(ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
input_overrun:
*out_len = op - out;
return LZO_E_INPUT_OVERRUN;
output_overrun:
*out_len = op - out;
return LZO_E_OUTPUT_OVERRUN;
lookbehind_overrun:
*out_len = op - out;
return LZO_E_LOOKBEHIND_OVERRUN;
}
EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X Decompressor");

43
lib/lzo/lzodefs.h Normal file
View file

@ -0,0 +1,43 @@
/*
* lzodefs.h -- architecture, OS and compiler specific defines
*
* Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
*
* The full LZO package can be found at:
* http://www.oberhumer.com/opensource/lzo/
*
* Changed for kernel use by:
* Nitin Gupta <nitingupta910@gmail.com>
* Richard Purdie <rpurdie@openedhand.com>
*/
#define LZO_VERSION 0x2020
#define LZO_VERSION_STRING "2.02"
#define LZO_VERSION_DATE "Oct 17 2005"
#define M1_MAX_OFFSET 0x0400
#define M2_MAX_OFFSET 0x0800
#define M3_MAX_OFFSET 0x4000
#define M4_MAX_OFFSET 0xbfff
#define M1_MIN_LEN 2
#define M1_MAX_LEN 2
#define M2_MIN_LEN 3
#define M2_MAX_LEN 8
#define M3_MIN_LEN 3
#define M3_MAX_LEN 33
#define M4_MIN_LEN 3
#define M4_MAX_LEN 9
#define M1_MARKER 0
#define M2_MARKER 64
#define M3_MARKER 32
#define M4_MARKER 16
#define D_BITS 14
#define D_MASK ((1u << D_BITS) - 1)
#define D_HIGH ((D_MASK >> 1) + 1)
#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
<< (s1)) ^ (p)[0])
#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])