diff --git a/Cargo.lock b/Cargo.lock index 52d0206c..9b53e063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -42,6 +42,22 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8cb7306107e4b10e64994de6d3274bd08996a7c1322a27b86482392f96be0a" +[[package]] +name = "cstr_core" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8758514b5f03968703f1db1f1e196e031d5268f5295ff99a5bf345008790ba85" +dependencies = [ + "cty", + "memchr", +] + +[[package]] +name = "cty" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7313c0d620d0cb4dbd9d019e461a4beb501071ff46ec0ab933efb4daa76d73e3" + [[package]] name = "dyld" version = "0.1.0" @@ -134,7 +150,7 @@ dependencies = [ [[package]] name = "libasync" version = "0.0.0" -source = "git+https://git.m-labs.hk/M-Labs/zc706.git#aa9379463207a02ee7ca4e597054a447b9a90232" +source = "git+https://git.m-labs.hk/M-Labs/zc706.git#614b1ef350edd8576fc71292fc89cf79a1810aba" dependencies = [ "embedded-hal", "libcortex_a9", @@ -146,7 +162,7 @@ dependencies = [ [[package]] name = "libboard_zynq" version = "0.0.0" -source = "git+https://git.m-labs.hk/M-Labs/zc706.git#aa9379463207a02ee7ca4e597054a447b9a90232" +source = "git+https://git.m-labs.hk/M-Labs/zc706.git#614b1ef350edd8576fc71292fc89cf79a1810aba" dependencies = [ "bit_field", "embedded-hal", @@ -161,7 +177,7 @@ dependencies = [ [[package]] name = "libcortex_a9" version = "0.0.0" -source = "git+https://git.m-labs.hk/M-Labs/zc706.git#aa9379463207a02ee7ca4e597054a447b9a90232" +source = "git+https://git.m-labs.hk/M-Labs/zc706.git#614b1ef350edd8576fc71292fc89cf79a1810aba" dependencies = [ "bit_field", "libregister", @@ -170,7 +186,7 @@ dependencies = [ [[package]] name = "libregister" version = "0.0.0" -source = "git+https://git.m-labs.hk/M-Labs/zc706.git#aa9379463207a02ee7ca4e597054a447b9a90232" +source = "git+https://git.m-labs.hk/M-Labs/zc706.git#614b1ef350edd8576fc71292fc89cf79a1810aba" dependencies = [ "bit_field", "vcell", @@ -180,7 +196,7 @@ dependencies = [ [[package]] name = "libsupport_zynq" version = "0.0.0" -source = "git+https://git.m-labs.hk/M-Labs/zc706.git#aa9379463207a02ee7ca4e597054a447b9a90232" +source = "git+https://git.m-labs.hk/M-Labs/zc706.git#614b1ef350edd8576fc71292fc89cf79a1810aba" dependencies = [ "compiler_builtins", "libboard_zynq", @@ -193,9 +209,9 @@ dependencies = [ [[package]] name = "linked_list_allocator" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1070ea54beccbfd3a3987aca6440f94cc1e0b447c2d979d8c7f761e265417e4" +checksum = "d6b60501dd4c850950bb43f970d544f6ce04e0ca021da2db2538fbe9d923f19e" [[package]] name = "log" @@ -212,6 +228,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdcec5e97041c7f0f1c5b7d93f12e57293c831c646f4cc7a5db59460c7ea8de6" +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + [[package]] name = "nb" version = "0.1.2" @@ -319,6 +341,17 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "szl" +version = "0.1.0" +dependencies = [ + "cstr_core", + "libboard_zynq", + "libcortex_a9", + "libsupport_zynq", + "log", +] + [[package]] name = "unicode-xid" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 49dffcba..db8f74b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [workspace] members = [ "libdyld", - "runtime" + "runtime", + "szl" ] [profile.dev] diff --git a/README.md b/README.md new file mode 100644 index 00000000..39428402 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +Build with: + +```shell +cargo xbuild -p runtime --release +cargo xbuild -p szl --release +``` diff --git a/remote_run.sh b/remote_run.sh index 7166b194..dfccd2e8 100755 --- a/remote_run.sh +++ b/remote_run.sh @@ -8,6 +8,6 @@ TARGET_FOLDER=/tmp/zynq-\$USER ssh $TARGET_HOST "mkdir -p $TARGET_FOLDER" rsync openocd/* $TARGET_HOST:$TARGET_FOLDER -rsync target/armv7-none-eabihf/release/runtime $TARGET_HOST:$TARGET_FOLDER +rsync target/armv7-none-eabihf/release/szl $TARGET_HOST:$TARGET_FOLDER rsync build/top.bit $TARGET_HOST:$TARGET_FOLDER -ssh $TARGET_HOST "cd $TARGET_FOLDER; openocd -f zc706.cfg -c 'pld load 0 top.bit; load_image runtime; resume 0; exit'" +ssh $TARGET_HOST "cd $TARGET_FOLDER; openocd -f zc706.cfg -c 'pld load 0 top.bit; load_image szl; resume 0; exit'" diff --git a/runtime.bin b/runtime.bin new file mode 100755 index 00000000..82fe7ef3 Binary files /dev/null and b/runtime.bin differ diff --git a/runtime/build.rs b/runtime/build.rs new file mode 100644 index 00000000..a2ce29f5 --- /dev/null +++ b/runtime/build.rs @@ -0,0 +1,18 @@ +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; + +fn main() { + // Put the linker script somewhere the linker can find it + let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap()); + File::create(out.join("link.x")) + .unwrap() + .write_all(include_bytes!("link.x")) + .unwrap(); + println!("cargo:rustc-link-search={}", out.display()); + + // Only re-run the build script when link.x is changed, + // instead of when any part of the source code changes. + println!("cargo:rerun-if-changed=link.x"); +} diff --git a/runtime/link.x b/runtime/link.x new file mode 100644 index 00000000..9d9e33c7 --- /dev/null +++ b/runtime/link.x @@ -0,0 +1,71 @@ +ENTRY(_boot_cores); + +STACK_SIZE = 0x8000; +HEAP_SIZE = 0x1000000; + +/* Provide some defaults */ +PROVIDE(Reset = _boot_cores); +PROVIDE(UndefinedInstruction = Reset); +PROVIDE(SoftwareInterrupt = Reset); +PROVIDE(PrefetchAbort = Reset); +PROVIDE(DataAbort = Reset); +PROVIDE(ReservedException = Reset); +PROVIDE(IRQ = Reset); +PROVIDE(FIQ = Reset); + +MEMORY +{ + SDRAM : ORIGIN = 0x00100000, LENGTH = 0x1FF00000 +} + +SECTIONS +{ + .text : + { + KEEP(*(.text.exceptions)); + *(.text.boot); + *(.text .text.*); + } > SDRAM + + .rodata : ALIGN(4) + { + *(.rodata .rodata.*); + } > SDRAM + + .data : ALIGN(4) + { + *(.data .data.*); + } > SDRAM + + .bss (NOLOAD) : ALIGN(0x4000) + { + /* Aligned to 16 kB */ + KEEP(*(.bss.l1_table)); + *(.bss .bss.*); + . = ALIGN(4); + } > SDRAM + __bss_start = ADDR(.bss); + __bss_end = ADDR(.bss) + SIZEOF(.bss); + + .heap (NOLOAD) : ALIGN(0x1000) + { + . += HEAP_SIZE; + } > SDRAM + __heap_start = ADDR(.heap); + __heap_end = ADDR(.heap) + SIZEOF(.heap); + + .stack (NOLOAD) : ALIGN(0x1000) + { + . += STACK_SIZE; + } > SDRAM + __stack_end = ADDR(.stack); + __stack_start = ADDR(.stack) + SIZEOF(.stack); + + /DISCARD/ : + { + /* Unused exception related info that only wastes space */ + *(.ARM.exidx); + *(.ARM.exidx.*); + *(.ARM.extab.*); + } +} diff --git a/runtime/src/main.rs b/runtime/src/main.rs index 447bf4c8..85e338b8 100644 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -8,10 +8,7 @@ extern crate log; use core::{cmp, str}; use log::info; -use libboard_zynq::{ - self as zynq, clocks::Clocks, clocks::source::{ClockSource, ArmPll, IoPll}, - timer::GlobalTimer, -}; +use libboard_zynq::timer::GlobalTimer; use libsupport_zynq::{logger, ram}; mod proto; @@ -42,14 +39,7 @@ pub fn main_core0() { log::set_max_level(log::LevelFilter::Debug); info!("NAR3 starting..."); - const CPU_FREQ: u32 = 800_000_000; - - ArmPll::setup(2 * CPU_FREQ); - Clocks::set_cpu_freq(CPU_FREQ); - IoPll::setup(1_000_000_000); - libboard_zynq::stdio::drop_uart(); // reinitialize UART after clocking change - let mut ddr = zynq::ddr::DdrRam::new(); - ram::init_alloc(&mut ddr); + ram::init_alloc_linker(); info!("Detected gateware: {}", identifier_read(&mut [0; 64])); diff --git a/shell.nix b/shell.nix index 949d7c31..f4f429dc 100644 --- a/shell.nix +++ b/shell.nix @@ -34,6 +34,7 @@ in rustPlatform.rust.rustc rustPlatform.rust.cargo rustcSrc + pkgs.clang_9 pkgs.cacert pkgs.cargo-xbuild diff --git a/szl/Cargo.toml b/szl/Cargo.toml new file mode 100644 index 00000000..96689877 --- /dev/null +++ b/szl/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "szl" +description = "Simple Zynq Loader" +version = "0.1.0" +authors = ["M-Labs"] +edition = "2018" + +[features] +target_zc706 = ["libboard_zynq/target_zc706", "libsupport_zynq/target_zc706"] +default = ["target_zc706"] + +[dependencies] +log = "0.4" +cstr_core = { version = "0.2", default-features = false } +libboard_zynq = { git = "https://git.m-labs.hk/M-Labs/zc706.git" } +libsupport_zynq = { git = "https://git.m-labs.hk/M-Labs/zc706.git" } +libcortex_a9 = { git = "https://git.m-labs.hk/M-Labs/zc706.git" } diff --git a/szl/build.rs b/szl/build.rs new file mode 100644 index 00000000..57a0b117 --- /dev/null +++ b/szl/build.rs @@ -0,0 +1,48 @@ +use std::process::Command; +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; + +fn main() { + // FIXME: this is dirty and unreliable. How to depend on the output of the runtime build? + let payload = "../target/armv7-none-eabihf/release/runtime"; + + let out = env::var("OUT_DIR").unwrap(); + let out_dir = &PathBuf::from(&out); + let status = Command::new("llvm-objcopy") + .args(&["-O", "binary", payload, &format!("{}/payload.bin", out)]) + .status().unwrap(); + assert!(status.success()); + let status = Command::new("lzma") + .args(&["--keep", "-f", &format!("{}/payload.bin", out)]) + .status().unwrap(); + assert!(status.success()); + println!("cargo:rerun-if-changed={}", payload); + + let status = Command::new("clang") + .args(&["-target", "armv7-unknown-linux", "-fno-stack-protector", + "src/unlzma.c", "-O2", "-c", "-fPIC", "-o", + &format!("{}/unlzma.o", out)]) + .status().unwrap(); + assert!(status.success()); + let status = Command::new("llvm-ar") + .args(&["crus", "libunlzma.a", "unlzma.o"]) + .current_dir(&Path::new(&out)) + .status().unwrap(); + assert!(status.success()); + println!("cargo:rustc-link-search=native={}", out); + println!("cargo:rustc-link-lib=static=unlzma"); + println!("cargo:rerun-if-changed=src/unlzma.c"); + + // Put the linker script somewhere the linker can find it + File::create(out_dir.join("link.x")) + .unwrap() + .write_all(include_bytes!("link.x")) + .unwrap(); + println!("cargo:rustc-link-search={}", out_dir.display()); + + // Only re-run the build script when link.x is changed, + // instead of when any part of the source code changes. + println!("cargo:rerun-if-changed=link.x"); +} diff --git a/link.x b/szl/link.x similarity index 100% rename from link.x rename to szl/link.x diff --git a/szl/src/main.rs b/szl/src/main.rs new file mode 100644 index 00000000..c5f46bed --- /dev/null +++ b/szl/src/main.rs @@ -0,0 +1,68 @@ +#![no_std] +#![no_main] + +extern crate log; + +use core::mem; +use log::{info, error}; +use cstr_core::CStr; + +use libboard_zynq::{ + self as zynq, clocks::Clocks, clocks::source::{ClockSource, ArmPll, IoPll}, + timer::GlobalTimer, +}; +use libsupport_zynq::{boot, logger}; + + +static mut STACK_CORE1: [u32; 512] = [0; 512]; + +extern "C" { + fn unlzma_simple(buf: *const u8, in_len: i32, + output: *mut u8, + error: extern fn(*const u8)) -> i32; +} + +extern fn lzma_error(message: *const u8) { + error!("LZMA error: {}", unsafe { CStr::from_ptr(message) }.to_str().unwrap()); +} + +#[no_mangle] +pub fn main_core0() { + GlobalTimer::start(); + let _ = logger::init(); + log::set_max_level(log::LevelFilter::Debug); + info!("Simple Zynq Loader starting"); + + const CPU_FREQ: u32 = 800_000_000; + + ArmPll::setup(2 * CPU_FREQ); + Clocks::set_cpu_freq(CPU_FREQ); + IoPll::setup(1_000_000_000); + libboard_zynq::stdio::drop_uart(); // reinitialize UART after clocking change + let mut ddr = zynq::ddr::DdrRam::new(); + + let payload = include_bytes!(concat!(env!("OUT_DIR"), "/payload.bin.lzma")); + info!("decompressing payload"); + let result = unsafe { + unlzma_simple(payload.as_ptr(), payload.len() as i32, ddr.ptr(), lzma_error) + }; + if result < 0 { + error!("decompression failed"); + } else { + let core1_stack = unsafe { &mut STACK_CORE1[..] }; + boot::Core1::start(core1_stack); + info!("executing payload"); + unsafe { + (mem::transmute::<*mut u8, fn()>(ddr.ptr::()))(); + } + } + + loop {} +} + +#[no_mangle] +pub fn main_core1() { + unsafe { + (mem::transmute::(0x00100000))(); + } +} diff --git a/szl/src/unlzma.c b/szl/src/unlzma.c new file mode 100644 index 00000000..47cd052a --- /dev/null +++ b/szl/src/unlzma.c @@ -0,0 +1,670 @@ +/* + *Taken from: Lzma decompressor for Linux kernel. Shamelessly snarfed + *from busybox 1.1.1 + * + *Linux kernel adaptation + *Copyright (C) 2006 Alain < alain@knaff.lu > + * + *Based on small lzma deflate implementation/Small range coder + *implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (C) 1999-2005 Igor Pavlov + * + *Copyrights of the parts, see headers below. + * + * + *This program is free software; you can redistribute it and/or + *modify it under the terms of the GNU Lesser General Public + *License as published by the Free Software Foundation; either + *version 2.1 of the License, or (at your option) any later version. + * + *This program is distributed in the hope that it will be useful, + *but WITHOUT ANY WARRANTY; without even the implied warranty of + *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + *Lesser General Public License for more details. + * + *You should have received a copy of the GNU Lesser General Public + *License along with this library; if not, write to the Free Software + *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define NULL ((void *)0) +#define alloca(size) __builtin_alloca(size) +#define malloc alloca +static inline void free(void *p) {} + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static long long read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (c) 1999-2005 Igor Pavlov + */ + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + int (*fill)(void*, unsigned int); + unsigned char *ptr; + unsigned char *buffer; + unsigned char *buffer_end; + int buffer_size; + unsigned int code; + unsigned int range; + unsigned int bound; + void (*error)(char *); +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + +/* Called twice: once at startup and once in rc_normalize() */ +static void rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + rc->error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void rc_init(struct rc *rc, + int (*fill)(void*, unsigned int), + unsigned char *buffer, int buffer_size) +{ + if (fill) + rc->fill = fill; + else + rc->fill = nofill; + rc->buffer = buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ +static void rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline unsigned int rc_is_bit_0_helper(struct rc *rc, unsigned short int *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int rc_is_bit_0(struct rc *rc, unsigned short int *p) +{ + unsigned int t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void rc_update_bit_0(struct rc *rc, unsigned short int *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, unsigned short int *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int rc_get_bit(struct rc *rc, unsigned short int *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void +rc_bit_tree_decode(struct rc *rc, unsigned short int *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + unsigned char pos; + unsigned int dict_size; + unsigned long long int dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + unsigned char *buffer; + unsigned char previous_byte; + int buffer_pos; + int bufsize; + int global_pos; + int(*flush)(void*, unsigned int); + struct lzma_header *header; +}; + +struct cstate { + int state; + unsigned int rep0, rep1, rep2, rep3; +}; + +static inline int get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline unsigned char peek_old_byte(struct writer *wr, + unsigned int offs) +{ + if (!wr->flush) { + int pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + unsigned int pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline int write_byte(struct writer *wr, unsigned char byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + if (wr->flush((char *)wr->buffer, wr->header->dict_size) + != wr->header->dict_size) + return -1; + } + return 0; +} + + +static inline int copy_byte(struct writer *wr, unsigned int offs) +{ + return write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline int copy_bytes(struct writer *wr, + unsigned int rep0, int len) +{ + do { + if (copy_byte(wr, rep0)) + return -1; + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); + + return len; +} + +static inline int process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, unsigned short int *p, + int pos_state, unsigned short int *prob, + int lc, unsigned int literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + unsigned short int *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + unsigned short int *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; + + return write_byte(wr, mi); +} + +static inline int process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, unsigned short int *p, + int pos_state, unsigned short int *prob) { + int offset; + unsigned short int *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + return copy_byte(wr, cst->rep0); + } else { + rc_update_bit_1(rc, prob); + } + } else { + unsigned int distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob = + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return 0; + if (cst->rep0 > wr->header->dict_size + || cst->rep0 > get_pos(wr)) + return -1; + } + + len += LZMA_MATCH_MIN_LEN; + + return copy_bytes(wr, cst->rep0, len); +} + + + +int unlzma(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ) +{ + struct lzma_header header; + int lc, pb, lp; + unsigned int pos_state_mask; + unsigned int literal_pos_mask; + unsigned short int *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + rc.error = error; + + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) { + error("bad header"); + goto exit_1; + } + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (unsigned short int *) malloc(num_probs * sizeof(*p)); + if (p == 0) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + unsigned short int *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) { + if (process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask)) { + error("LZMA data is corrupt"); + goto exit_3; + } + } else { + if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) { + error("LZMA data is corrupt"); + goto exit_3; + } + if (cst.rep0 == 0) + break; + } + if (rc.buffer_size <= 0) + goto exit_3; + } + + if (posp) + *posp = rc.ptr-rc.buffer; + if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos) + ret = 0; +exit_3: + free(p); +exit_2: + if (!output) + free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +} + +int unlzma_simple(unsigned char *buf, int in_len, + unsigned char *output, + void(*error)(char *x)) +{ + return unlzma(buf, in_len, NULL, NULL, output, NULL, error); +}