szl: implemented #96

SZL no longer do self-extraction for runtime binary, it would boot from
SD/ethernet depending on the boot mode settings.
This allows a larger runtime binary, so we can optimize for speed in the
runtime firmware for better performance, and allow more features to be
added later.
netboot
pca006132 2020-09-01 09:39:21 +08:00
parent 653d143784
commit ccf8ae5b5d
9 changed files with 553 additions and 794 deletions

22
src/Cargo.lock generated
View File

@ -68,22 +68,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f8cb7306107e4b10e64994de6d3274bd08996a7c1322a27b86482392f96be0a"
[[package]]
name = "cstr_core"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edf9ff5f1182cea3b0f4269dd3edf146b5bfedc5dc8a29b95edf855f438786b7"
dependencies = [
"cty",
"memchr",
]
[[package]]
name = "cty"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7313c0d620d0cb4dbd9d019e461a4beb501071ff46ec0ab933efb4daa76d73e3"
[[package]]
name = "dwarf"
version = "0.0.0"
@ -471,10 +455,12 @@ dependencies = [
name = "szl"
version = "0.1.0"
dependencies = [
"cc",
"cstr_core",
"byteorder",
"core_io",
"libboard_zynq",
"libconfig",
"libcortex_a9",
"libregister",
"libsupport_zynq",
"log",
]

View File

@ -10,6 +10,16 @@ members = [
"szl"
]
# Note: we are using dev profile for szl to override the opt-level only
[profile.dev]
panic = "abort"
debug = true
codegen-units = 1
opt-level = 'z'
lto = true
debug-assertions = false
overflow-checks = false
[profile.release]
panic = "abort"
debug = true
@ -17,13 +27,6 @@ codegen-units = 1
opt-level = 's'
lto = true
[profile.release.package.smoltcp]
opt-level = 2
[profile.release.package.libasync]
opt-level = 2
[profile.release.package.libboard_zynq]
opt-level = 2
[patch.crates-io]
core_io = { path = "./libcoreio" }
compiler_builtins = { git = "https://git.m-labs.hk/M-Labs/compiler-builtins-zynq.git"}

View File

@ -1,6 +1,6 @@
VARIANT := simple
all: ../build/firmware/armv7-none-eabihf/release/szl
all: ../build/firmware/armv7-none-eabihf/release/szl ../build/firmware/armv7-none-eabihf/release/runtime
.PHONY: all
@ -11,10 +11,7 @@ all: ../build/firmware/armv7-none-eabihf/release/szl
../build/firmware/armv7-none-eabihf/release/runtime: ../build/pl.rs ../build/rustc-cfg $(shell find . -path ./szl -prune -o -print)
XBUILD_SYSROOT_PATH=`pwd`/../build/sysroot cargo xbuild --release -p runtime --target-dir ../build/firmware
llvm-objcopy -O binary ../build/firmware/armv7-none-eabihf/release/runtime ../build/runtime.bin
../build/szl-payload.bin.lzma: ../build/firmware/armv7-none-eabihf/release/runtime
llvm-objcopy -O binary ../build/firmware/armv7-none-eabihf/release/runtime ../build/szl-payload.bin
lzma --keep -f ../build/szl-payload.bin
../build/firmware/armv7-none-eabihf/release/szl: .cargo/* armv7-none-eabihf.json Cargo.lock Cargo.toml szl/* szl/src/* ../build/szl-payload.bin.lzma
XBUILD_SYSROOT_PATH=`pwd`/../build/sysroot cargo xbuild --release -p szl --target-dir ../build/firmware
../build/firmware/armv7-none-eabihf/release/szl: .cargo/* armv7-none-eabihf.json Cargo.lock Cargo.toml szl/* szl/src/*
XBUILD_SYSROOT_PATH=`pwd`/../build/sysroot cargo xbuild -p szl --target-dir ../build/firmware

View File

@ -11,10 +11,12 @@ default = ["target_zc706"]
[dependencies]
log = "0.4"
cstr_core = { version = "0.2", default-features = false }
libboard_zynq = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
libsupport_zynq = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git", default-features = false, features = ["dummy_irq_handler"] }
libcortex_a9 = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
byteorder = { version = "1.3", default-features = false }
core_io = { version = "0.1", features = ["collections"] }
libconfig = { path = "../libconfig" }
libboard_zynq = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
libsupport_zynq = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
libcortex_a9 = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
libregister = { git = "https://git.m-labs.hk/M-Labs/zynq-rs.git" }
[build-dependencies]
cc = { version = "1.0.1" }

View File

@ -1,14 +1,13 @@
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
fn main() {
println!("cargo:rerun-if-changed=build.rs");
let out = env::var("OUT_DIR").unwrap();
let out_dir = &PathBuf::from(&out);
compile_unlzma();
// Put the linker script somewhere the linker can find it
File::create(out_dir.join("link.x"))
.unwrap()
@ -21,29 +20,3 @@ fn main() {
println!("cargo:rerun-if-changed=link.x");
}
pub fn compile_unlzma() {
let cfg = &mut cc::Build::new();
cfg.compiler("clang");
cfg.no_default_flags(true);
cfg.warnings(false);
cfg.flag("-nostdlib");
cfg.flag("-ffreestanding");
cfg.flag("-fPIC");
cfg.flag("-fno-stack-protector");
cfg.flag("--target=armv7-none-eabihf");
cfg.flag("-Oz");
cfg.flag("-flto=full");
let sources = vec![
"unlzma.c",
];
let root = Path::new("./");
for src in sources {
println!("cargo:rerun-if-changed={}", src);
cfg.file(root.join("src").join(src));
}
cfg.compile("unlzma");
}

View File

@ -4,6 +4,7 @@ MEMORY
{
/* 256 kB On-Chip Memory */
OCM : ORIGIN = 0, LENGTH = 0x30000
SDRAM : ORIGIN = 0x00100000, LENGTH = 0x1FF00000
OCM3 : ORIGIN = 0xFFFF0000, LENGTH = 0x10000
}
@ -26,6 +27,16 @@ SECTIONS
*(.data .data.*);
} > OCM
.heap (NOLOAD) : ALIGN(8)
{
__runtime_start = .;
. += 0x8000000;
__runtime_end = .;
__heap0_start = .;
. += 0x8000000;
__heap0_end = .;
} > SDRAM
.bss (NOLOAD) : ALIGN(4)
{
__bss_start = .;
@ -34,12 +45,6 @@ SECTIONS
__bss_end = .;
} > OCM3
.heap (NOLOAD) : ALIGN(8)
{
__heap0_start = .;
__heap0_end = .;
} > OCM3
.stack1 (NOLOAD) : ALIGN(8)
{
__stack1_end = .;

View File

@ -1,47 +1,66 @@
#![no_std]
#![no_main]
#![feature(panic_info_message)]
extern crate alloc;
extern crate log;
use core::mem;
use log::{debug, info, error};
use cstr_core::CStr;
mod netboot;
use libcortex_a9::{
enable_fpu,
l2c::enable_l2_cache,
cache::{dcciall, iciallu, bpiall},
asm::{dsb, isb},
};
use alloc::rc::Rc;
use core::mem;
use core_io::{Read, Seek};
use libboard_zynq::{
self as zynq, println,
clocks::Clocks, clocks::source::{ClockSource, ArmPll, IoPll},
stdio,
logger,
self as zynq,
clocks::source::{ArmPll, ClockSource, IoPll},
clocks::Clocks,
logger, println, sdio, slcr,
timer::GlobalTimer,
};
use libsupport_zynq as _;
use libconfig::{bootgen, sd_reader, Config};
use libcortex_a9::{
asm::{dsb, isb},
cache::{bpiall, dcciall, iciallu},
enable_fpu,
l2c::enable_l2_cache,
};
use libregister::RegisterR;
use libsupport_zynq::ram;
use log::info;
extern "C" {
fn unlzma_simple(buf: *const u8, in_len: i32,
output: *mut u8,
error: extern fn(*const u8)) -> i32;
static mut __runtime_start: usize;
static mut __runtime_end: usize;
}
extern fn lzma_error(message: *const u8) {
let msg = unsafe {CStr::from_ptr(message)}.to_str();
if let Ok(msg) = msg {
println!("LZMA error: {}", msg);
}
fn boot_sd<File: Read + Seek>(
file: &mut Option<File>,
runtime_start: *mut u8,
runtime_max: usize,
) -> Result<(), ()> {
if file.is_none() {
log::error!("No bootgen file");
return Err(());
}
let mut file = file.as_mut().unwrap();
info!("Loading gateware");
bootgen::load_bitstream(&mut file).map_err(|e| log::error!("Cannot load gateware: {:?}", e))?;
#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! {
stdio::drop_uart();
println!("panicked!");
loop {}
info!("Loading runtime");
let runtime =
bootgen::get_runtime(&mut file).map_err(|e| log::error!("Cannot load runtime: {:?}", e))?;
if runtime.len() > runtime_max {
log::error!(
"Runtime binary too large, max {} but got {}",
runtime_max,
runtime.len()
);
}
unsafe {
let target = core::slice::from_raw_parts_mut(runtime_start, runtime.len());
target.copy_from_slice(&runtime);
}
Ok(())
}
#[no_mangle]
@ -50,7 +69,8 @@ pub fn main_core0() {
enable_fpu();
logger::init().unwrap();
log::set_max_level(log::LevelFilter::Debug);
println!(r#"
println!(
r#"
__________ __
/ ___/__ / / /
@ -59,12 +79,11 @@ pub fn main_core0() {
/____/ /____/_____/
(C) 2020 M-Labs
"#);
"#
);
info!("Simple Zynq Loader starting...");
enable_l2_cache();
debug!("FPU enabled on Core0");
const CPU_FREQ: u32 = 800_000_000;
ArmPll::setup(2 * CPU_FREQ);
@ -72,15 +91,61 @@ pub fn main_core0() {
IoPll::setup(1_000_000_000);
libboard_zynq::stdio::drop_uart(); // reinitialize UART after clocking change
let mut ddr = zynq::ddr::DdrRam::ddrram();
ram::init_alloc_core0();
let payload = include_bytes!("../../../build/szl-payload.bin.lzma");
info!("decompressing payload");
let result = unsafe {
unlzma_simple(payload.as_ptr(), payload.len() as i32, ddr.ptr(), lzma_error)
};
if result < 0 {
error!("decompression failed");
let sdio0 = sdio::Sdio::sdio0(true);
let fs = if sdio0.is_card_inserted() {
info!("Card inserted. Mounting file system.");
let sd = sdio::sd_card::SdCard::from_sdio(sdio0).unwrap();
let reader = sd_reader::SdReader::new(sd);
reader
.mount_fatfs(sd_reader::PartitionEntry::Entry1)
.map(|v| Rc::new(v))
.ok()
} else {
info!("No SD card inserted.");
None
};
let fs_ref = fs.as_ref();
let root_dir = fs_ref.map(|fs| fs.root_dir());
let mut bootgen_file = root_dir.and_then(|root_dir| root_dir.open_file("/BOOT.BIN").ok());
let config = Config::from_fs(fs.clone());
unsafe {
let max_len =
&__runtime_end as *const usize as usize - &__runtime_start as *const usize as usize;
match slcr::RegisterBlock::unlocked(|slcr| slcr.boot_mode.read().boot_mode_pins()) {
slcr::BootModePins::Jtag => netboot::netboot(
&mut bootgen_file,
config,
&mut __runtime_start as *mut usize as *mut u8,
max_len,
),
slcr::BootModePins::SdCard => {
if boot_sd(
&mut bootgen_file,
&mut __runtime_start as *mut usize as *mut u8,
max_len,
)
.is_err()
{
log::error!("Error booting from SD card");
log::info!("Fall back on netboot");
netboot::netboot(
&mut bootgen_file,
config,
&mut __runtime_start as *mut usize as *mut u8,
max_len,
)
}
}
v => {
panic!("Boot mode {:?} not supported", v);
}
};
}
info!("Preparing for runtime execution");
// Flush data cache entries for all of L1 cache, including
// Memory/Instruction Synchronization Barriers
dcciall();
@ -94,7 +159,6 @@ pub fn main_core0() {
unsafe {
(mem::transmute::<*mut u8, fn()>(ddr.ptr::<u8>()))();
}
}
loop {}
}

399
src/szl/src/netboot.rs Normal file
View File

@ -0,0 +1,399 @@
use alloc::vec;
use alloc::vec::Vec;
use byteorder::{ByteOrder, NetworkEndian};
use core_io::{Read, Seek};
use libboard_zynq::{
devc,
eth::Eth,
smoltcp::{
self,
iface::{EthernetInterfaceBuilder, NeighborCache},
time::Instant,
wire::IpCidr,
},
timer::GlobalTimer,
};
use libconfig::{bootgen, net_settings, Config};
enum NetConnState {
WaitCommand,
FirmwareLength(usize, u8),
FirmwareDownload(usize, usize),
FirmwareWaitO,
FirmwareWaitK,
GatewareLength(usize, u8),
GatewareDownload(usize, usize),
GatewareWaitO,
GatewareWaitK,
}
struct NetConn {
state: NetConnState,
firmware_downloaded: bool,
gateware_downloaded: bool,
}
impl NetConn {
pub fn new() -> NetConn {
NetConn {
state: NetConnState::WaitCommand,
firmware_downloaded: false,
gateware_downloaded: false,
}
}
pub fn reset(&mut self) {
self.state = NetConnState::WaitCommand;
self.firmware_downloaded = false;
self.gateware_downloaded = false;
}
fn input_partial<File: Read + Seek>(
&mut self,
bootgen_file: &mut Option<File>,
runtime_start: *mut u8,
runtime_max_len: usize,
buf: &[u8],
storage: &mut Vec<u8>,
mut boot_callback: impl FnMut(),
) -> Result<usize, ()> {
match self.state {
NetConnState::WaitCommand => match buf[0] {
b'F' => {
log::info!("Received firmware load command");
self.state = NetConnState::FirmwareLength(0, 0);
Ok(1)
}
b'G' => {
log::info!("Received gateware load command");
self.state = NetConnState::GatewareLength(0, 0);
storage.clear();
Ok(1)
}
b'B' => {
if !self.gateware_downloaded {
log::info!("Gateware not loaded via netboot");
if bootgen_file.is_none() {
log::error!("No bootgen file to load gateware");
return Err(());
}
log::info!("Attempting to load from SD card");
if let Err(e) = bootgen::load_bitstream(bootgen_file.as_mut().unwrap()) {
log::error!("Gateware load failed: {:?}", e);
return Err(());
}
}
if self.firmware_downloaded {
log::info!("Received boot command");
boot_callback();
self.state = NetConnState::WaitCommand;
Ok(1)
} else {
log::error!("Received boot command, but no firmware downloaded");
Err(())
}
}
_ => {
log::error!("Received unknown netboot command: 0x{:02x}", buf[0]);
Err(())
}
},
NetConnState::FirmwareLength(firmware_length, recv_bytes) => {
let firmware_length = (firmware_length << 8) | (buf[0] as usize);
let recv_bytes = recv_bytes + 1;
if recv_bytes == 4 {
if firmware_length > runtime_max_len {
log::error!(
"Runtime too large, maximum {} but requested {}",
runtime_max_len,
firmware_length
);
return Err(());
}
self.state = NetConnState::FirmwareDownload(firmware_length, 0);
storage.clear();
storage.reserve(firmware_length);
} else {
self.state = NetConnState::FirmwareLength(firmware_length, recv_bytes);
}
Ok(1)
}
NetConnState::FirmwareDownload(firmware_length, recv_bytes) => {
let max_length = firmware_length - recv_bytes;
let buf = if buf.len() > max_length {
&buf[..max_length]
} else {
&buf[..]
};
let length = buf.len();
storage.extend_from_slice(buf);
let recv_bytes = recv_bytes + length;
if recv_bytes == firmware_length {
self.state = NetConnState::FirmwareWaitO;
Ok(length)
} else {
self.state = NetConnState::FirmwareDownload(firmware_length, recv_bytes);
Ok(length)
}
}
NetConnState::FirmwareWaitO => {
if buf[0] == b'O' {
self.state = NetConnState::FirmwareWaitK;
Ok(1)
} else {
log::error!("End-of-firmware confirmation failed");
Err(())
}
}
NetConnState::FirmwareWaitK => {
if buf[0] == b'K' {
log::info!("Firmware successfully downloaded");
self.state = NetConnState::WaitCommand;
self.firmware_downloaded = true;
{
let dest = unsafe {
core::slice::from_raw_parts_mut(runtime_start, storage.len())
};
dest.copy_from_slice(storage);
}
Ok(1)
} else {
log::error!("End-of-firmware confirmation failed");
Err(())
}
}
NetConnState::GatewareLength(gateware_length, recv_bytes) => {
let gateware_length = (gateware_length << 8) | (buf[0] as usize);
let recv_bytes = recv_bytes + 1;
if recv_bytes == 4 {
self.state = NetConnState::GatewareDownload(gateware_length, 0);
storage.clear();
storage.reserve_exact(gateware_length);
} else {
self.state = NetConnState::GatewareLength(gateware_length, recv_bytes);
}
Ok(1)
}
NetConnState::GatewareDownload(gateware_length, recv_bytes) => {
let max_length = gateware_length - recv_bytes;
let buf = if buf.len() > max_length {
&buf[..max_length]
} else {
&buf[..]
};
let length = buf.len();
storage.extend_from_slice(buf);
let recv_bytes = recv_bytes + length;
if recv_bytes == gateware_length {
self.state = NetConnState::GatewareWaitO;
Ok(length)
} else {
self.state = NetConnState::GatewareDownload(gateware_length, recv_bytes);
Ok(length)
}
}
NetConnState::GatewareWaitO => {
if buf[0] == b'O' {
self.state = NetConnState::GatewareWaitK;
Ok(1)
} else {
log::error!("End-of-gateware confirmation failed");
Err(())
}
}
NetConnState::GatewareWaitK => {
if buf[0] == b'K' {
log::info!("Preprocessing bitstream...");
// find sync word 0xFFFFFFFF AA995566
let sync_word: [u8; 8] = [0xFF, 0xFF, 0xFF, 0xFF, 0xAA, 0x99, 0x55, 0x66];
let mut i = 0;
let mut state = 0;
while i < storage.len() {
if storage[i] == sync_word[state] {
state += 1;
if state == sync_word.len() {
break;
}
} else {
// backtrack
// not very efficient but we only have 8 elements
'outer: while state > 0 {
state -= 1;
for j in 0..state {
if storage[i - j] != sync_word[state - j] {
continue 'outer;
}
}
break;
}
}
i += 1;
}
if state != sync_word.len() {
log::error!("Sync word not found in bitstream (corrupted?)");
return Err(());
}
// we need the sync word
// i was pointing to the last element in the sync sequence
i -= sync_word.len() - 1;
// // append no-op
// storage.extend_from_slice(&[0x20, 0, 0, 0]);
let bitstream = &mut storage[i..];
{
// swap endian
let swap = unsafe {
core::slice::from_raw_parts_mut(
bitstream.as_mut_ptr() as usize as *mut u32,
bitstream.len() / 4,
)
};
NetworkEndian::from_slice_u32(swap);
}
unsafe {
// align to 64 bytes
let ptr = alloc::alloc::alloc(
alloc::alloc::Layout::from_size_align(bitstream.len(), 64).unwrap(),
);
let buffer = core::slice::from_raw_parts_mut(ptr, bitstream.len());
buffer.copy_from_slice(bitstream);
let mut devcfg = devc::DevC::new();
devcfg.enable();
let result = devcfg.program(&buffer);
core::ptr::drop_in_place(ptr);
if let Err(e) = result {
log::error!("Error during FPGA startup: {}", e);
return Err(());
}
}
log::info!("Gateware successfully downloaded");
self.state = NetConnState::WaitCommand;
self.gateware_downloaded = true;
Ok(1)
} else {
log::info!("End-of-gateware confirmation failed");
Err(())
}
}
}
}
fn input<File: Read + Seek>(
&mut self,
bootgen_file: &mut Option<File>,
runtime_start: *mut u8,
runtime_max_len: usize,
buf: &[u8],
storage: &mut Vec<u8>,
mut boot_callback: impl FnMut(),
) -> Result<(), ()> {
let mut remaining = &buf[..];
while !remaining.is_empty() {
let read_cnt = self.input_partial(
bootgen_file,
runtime_start,
runtime_max_len,
remaining,
storage,
&mut boot_callback,
)?;
remaining = &remaining[read_cnt..];
}
Ok(())
}
}
pub fn netboot<File: Read + Seek>(
bootgen_file: &mut Option<File>,
cfg: Config,
runtime_start: *mut u8,
runtime_max_len: usize,
) {
log::info!("Preparing network for netboot");
let net_addresses = net_settings::get_adresses(&cfg);
log::info!("Network addresses: {}", net_addresses);
let eth = Eth::eth0(net_addresses.hardware_addr.0.clone());
let eth = eth.start_rx(8);
let mut eth = eth.start_tx(8);
let mut neighbor_map = [None; 2];
let neighbor_cache = NeighborCache::new(&mut neighbor_map[..]);
let mut ip_addrs = [IpCidr::new(net_addresses.ipv4_addr, 0)];
let mut interface = EthernetInterfaceBuilder::new(&mut eth)
.ethernet_addr(net_addresses.hardware_addr)
.ip_addrs(&mut ip_addrs[..])
.neighbor_cache(neighbor_cache)
.finalize();
let mut rx_storage = vec![0; 4096];
let mut tx_storage = vec![0; 128];
let mut socket_set_entries: [_; 1] = Default::default();
let mut sockets = smoltcp::socket::SocketSet::new(&mut socket_set_entries[..]);
let tcp_rx_buffer = smoltcp::socket::TcpSocketBuffer::new(&mut rx_storage[..]);
let tcp_tx_buffer = smoltcp::socket::TcpSocketBuffer::new(&mut tx_storage[..]);
let tcp_socket = smoltcp::socket::TcpSocket::new(tcp_rx_buffer, tcp_tx_buffer);
let tcp_handle = sockets.add(tcp_socket);
let mut net_conn = NetConn::new();
let mut storage = Vec::new();
let mut boot_flag = false;
let timer = unsafe { GlobalTimer::get() };
log::info!("Waiting for connections...");
loop {
let timestamp = Instant::from_millis(timer.get_time().0 as i64);
{
let socket = &mut *sockets.get::<smoltcp::socket::TcpSocket>(tcp_handle);
if boot_flag {
return;
}
if !socket.is_open() {
socket.listen(4269).unwrap() // 0x10ad
}
if socket.may_recv() {
if socket
.recv(|data| {
(
data.len(),
net_conn
.input(
bootgen_file,
runtime_start,
runtime_max_len,
data,
&mut storage,
|| {
boot_flag = true;
},
)
.is_err(),
)
})
.unwrap()
{
net_conn.reset();
socket.close();
}
} else if socket.may_send() {
net_conn.reset();
socket.close();
}
}
match interface.poll(&mut sockets, timestamp) {
Ok(_) => (),
Err(smoltcp::Error::Unrecognized) => (),
Err(err) => log::error!("Network error: {}", err),
}
}
}

View File

@ -1,670 +0,0 @@
/*
*Taken from: Lzma decompressor for Linux kernel. Shamelessly snarfed
*from busybox 1.1.1
*
*Linux kernel adaptation
*Copyright (C) 2006 Alain < alain@knaff.lu >
*
*Based on small lzma deflate implementation/Small range coder
*implementation for lzma.
*Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
*
*Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
*Copyright (C) 1999-2005 Igor Pavlov
*
*Copyrights of the parts, see headers below.
*
*
*This program is free software; you can redistribute it and/or
*modify it under the terms of the GNU Lesser General Public
*License as published by the Free Software Foundation; either
*version 2.1 of the License, or (at your option) any later version.
*
*This program is distributed in the hope that it will be useful,
*but WITHOUT ANY WARRANTY; without even the implied warranty of
*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
*Lesser General Public License for more details.
*
*You should have received a copy of the GNU Lesser General Public
*License along with this library; if not, write to the Free Software
*Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define NULL ((void *)0)
#define alloca(size) __builtin_alloca(size)
#define malloc alloca
static inline void free(void *p) {}
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
static long long read_int(unsigned char *ptr, int size)
{
int i;
long long ret = 0;
for (i = 0; i < size; i++)
ret = (ret << 8) | ptr[size-i-1];
return ret;
}
#define ENDIAN_CONVERT(x) \
x = (typeof(x))read_int((unsigned char *)&x, sizeof(x))
/* Small range coder implementation for lzma.
*Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
*
*Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
*Copyright (c) 1999-2005 Igor Pavlov
*/
#define LZMA_IOBUF_SIZE 0x10000
struct rc {
int (*fill)(void*, unsigned int);
unsigned char *ptr;
unsigned char *buffer;
unsigned char *buffer_end;
int buffer_size;
unsigned int code;
unsigned int range;
unsigned int bound;
void (*error)(char *);
};
#define RC_TOP_BITS 24
#define RC_MOVE_BITS 5
#define RC_MODEL_TOTAL_BITS 11
static int nofill(void *buffer, unsigned int len)
{
return -1;
}
/* Called twice: once at startup and once in rc_normalize() */
static void rc_read(struct rc *rc)
{
rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
if (rc->buffer_size <= 0)
rc->error("unexpected EOF");
rc->ptr = rc->buffer;
rc->buffer_end = rc->buffer + rc->buffer_size;
}
/* Called once */
static inline void rc_init(struct rc *rc,
int (*fill)(void*, unsigned int),
unsigned char *buffer, int buffer_size)
{
if (fill)
rc->fill = fill;
else
rc->fill = nofill;
rc->buffer = buffer;
rc->buffer_size = buffer_size;
rc->buffer_end = rc->buffer + rc->buffer_size;
rc->ptr = rc->buffer;
rc->code = 0;
rc->range = 0xFFFFFFFF;
}
static inline void rc_init_code(struct rc *rc)
{
int i;
for (i = 0; i < 5; i++) {
if (rc->ptr >= rc->buffer_end)
rc_read(rc);
rc->code = (rc->code << 8) | *rc->ptr++;
}
}
/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
static void rc_do_normalize(struct rc *rc)
{
if (rc->ptr >= rc->buffer_end)
rc_read(rc);
rc->range <<= 8;
rc->code = (rc->code << 8) | *rc->ptr++;
}
static inline void rc_normalize(struct rc *rc)
{
if (rc->range < (1 << RC_TOP_BITS))
rc_do_normalize(rc);
}
/* Called 9 times */
/* Why rc_is_bit_0_helper exists?
*Because we want to always expose (rc->code < rc->bound) to optimizer
*/
static inline unsigned int rc_is_bit_0_helper(struct rc *rc, unsigned short int *p)
{
rc_normalize(rc);
rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
return rc->bound;
}
static inline int rc_is_bit_0(struct rc *rc, unsigned short int *p)
{
unsigned int t = rc_is_bit_0_helper(rc, p);
return rc->code < t;
}
/* Called ~10 times, but very small, thus inlined */
static inline void rc_update_bit_0(struct rc *rc, unsigned short int *p)
{
rc->range = rc->bound;
*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
}
static inline void rc_update_bit_1(struct rc *rc, unsigned short int *p)
{
rc->range -= rc->bound;
rc->code -= rc->bound;
*p -= *p >> RC_MOVE_BITS;
}
/* Called 4 times in unlzma loop */
static int rc_get_bit(struct rc *rc, unsigned short int *p, int *symbol)
{
if (rc_is_bit_0(rc, p)) {
rc_update_bit_0(rc, p);
*symbol *= 2;
return 0;
} else {
rc_update_bit_1(rc, p);
*symbol = *symbol * 2 + 1;
return 1;
}
}
/* Called once */
static inline int rc_direct_bit(struct rc *rc)
{
rc_normalize(rc);
rc->range >>= 1;
if (rc->code >= rc->range) {
rc->code -= rc->range;
return 1;
}
return 0;
}
/* Called twice */
static inline void
rc_bit_tree_decode(struct rc *rc, unsigned short int *p, int num_levels, int *symbol)
{
int i = num_levels;
*symbol = 1;
while (i--)
rc_get_bit(rc, p + *symbol, symbol);
*symbol -= 1 << num_levels;
}
/*
* Small lzma deflate implementation.
* Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
*
* Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
* Copyright (C) 1999-2005 Igor Pavlov
*/
struct lzma_header {
unsigned char pos;
unsigned int dict_size;
unsigned long long int dst_size;
} __attribute__ ((packed)) ;
#define LZMA_BASE_SIZE 1846
#define LZMA_LIT_SIZE 768
#define LZMA_NUM_POS_BITS_MAX 4
#define LZMA_LEN_NUM_LOW_BITS 3
#define LZMA_LEN_NUM_MID_BITS 3
#define LZMA_LEN_NUM_HIGH_BITS 8
#define LZMA_LEN_CHOICE 0
#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
#define LZMA_LEN_MID (LZMA_LEN_LOW \
+ (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
#define LZMA_LEN_HIGH (LZMA_LEN_MID \
+(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
#define LZMA_NUM_STATES 12
#define LZMA_NUM_LIT_STATES 7
#define LZMA_START_POS_MODEL_INDEX 4
#define LZMA_END_POS_MODEL_INDEX 14
#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
#define LZMA_NUM_POS_SLOT_BITS 6
#define LZMA_NUM_LEN_TO_POS_STATES 4
#define LZMA_NUM_ALIGN_BITS 4
#define LZMA_MATCH_MIN_LEN 2
#define LZMA_IS_MATCH 0
#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
+ (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
#define LZMA_SPEC_POS (LZMA_POS_SLOT \
+(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
#define LZMA_ALIGN (LZMA_SPEC_POS \
+ LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
struct writer {
unsigned char *buffer;
unsigned char previous_byte;
int buffer_pos;
int bufsize;
int global_pos;
int(*flush)(void*, unsigned int);
struct lzma_header *header;
};
struct cstate {
int state;
unsigned int rep0, rep1, rep2, rep3;
};
static inline int get_pos(struct writer *wr)
{
return
wr->global_pos + wr->buffer_pos;
}
static inline unsigned char peek_old_byte(struct writer *wr,
unsigned int offs)
{
if (!wr->flush) {
int pos;
while (offs > wr->header->dict_size)
offs -= wr->header->dict_size;
pos = wr->buffer_pos - offs;
return wr->buffer[pos];
} else {
unsigned int pos = wr->buffer_pos - offs;
while (pos >= wr->header->dict_size)
pos += wr->header->dict_size;
return wr->buffer[pos];
}
}
static inline int write_byte(struct writer *wr, unsigned char byte)
{
wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
wr->buffer_pos = 0;
wr->global_pos += wr->header->dict_size;
if (wr->flush((char *)wr->buffer, wr->header->dict_size)
!= wr->header->dict_size)
return -1;
}
return 0;
}
static inline int copy_byte(struct writer *wr, unsigned int offs)
{
return write_byte(wr, peek_old_byte(wr, offs));
}
static inline int copy_bytes(struct writer *wr,
unsigned int rep0, int len)
{
do {
if (copy_byte(wr, rep0))
return -1;
len--;
} while (len != 0 && wr->buffer_pos < wr->header->dst_size);
return len;
}
static inline int process_bit0(struct writer *wr, struct rc *rc,
struct cstate *cst, unsigned short int *p,
int pos_state, unsigned short int *prob,
int lc, unsigned int literal_pos_mask) {
int mi = 1;
rc_update_bit_0(rc, prob);
prob = (p + LZMA_LITERAL +
(LZMA_LIT_SIZE
* (((get_pos(wr) & literal_pos_mask) << lc)
+ (wr->previous_byte >> (8 - lc))))
);
if (cst->state >= LZMA_NUM_LIT_STATES) {
int match_byte = peek_old_byte(wr, cst->rep0);
do {
int bit;
unsigned short int *prob_lit;
match_byte <<= 1;
bit = match_byte & 0x100;
prob_lit = prob + 0x100 + bit + mi;
if (rc_get_bit(rc, prob_lit, &mi)) {
if (!bit)
break;
} else {
if (bit)
break;
}
} while (mi < 0x100);
}
while (mi < 0x100) {
unsigned short int *prob_lit = prob + mi;
rc_get_bit(rc, prob_lit, &mi);
}
if (cst->state < 4)
cst->state = 0;
else if (cst->state < 10)
cst->state -= 3;
else
cst->state -= 6;
return write_byte(wr, mi);
}
static inline int process_bit1(struct writer *wr, struct rc *rc,
struct cstate *cst, unsigned short int *p,
int pos_state, unsigned short int *prob) {
int offset;
unsigned short int *prob_len;
int num_bits;
int len;
rc_update_bit_1(rc, prob);
prob = p + LZMA_IS_REP + cst->state;
if (rc_is_bit_0(rc, prob)) {
rc_update_bit_0(rc, prob);
cst->rep3 = cst->rep2;
cst->rep2 = cst->rep1;
cst->rep1 = cst->rep0;
cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3;
prob = p + LZMA_LEN_CODER;
} else {
rc_update_bit_1(rc, prob);
prob = p + LZMA_IS_REP_G0 + cst->state;
if (rc_is_bit_0(rc, prob)) {
rc_update_bit_0(rc, prob);
prob = (p + LZMA_IS_REP_0_LONG
+ (cst->state <<
LZMA_NUM_POS_BITS_MAX) +
pos_state);
if (rc_is_bit_0(rc, prob)) {
rc_update_bit_0(rc, prob);
cst->state = cst->state < LZMA_NUM_LIT_STATES ?
9 : 11;
return copy_byte(wr, cst->rep0);
} else {
rc_update_bit_1(rc, prob);
}
} else {
unsigned int distance;
rc_update_bit_1(rc, prob);
prob = p + LZMA_IS_REP_G1 + cst->state;
if (rc_is_bit_0(rc, prob)) {
rc_update_bit_0(rc, prob);
distance = cst->rep1;
} else {
rc_update_bit_1(rc, prob);
prob = p + LZMA_IS_REP_G2 + cst->state;
if (rc_is_bit_0(rc, prob)) {
rc_update_bit_0(rc, prob);
distance = cst->rep2;
} else {
rc_update_bit_1(rc, prob);
distance = cst->rep3;
cst->rep3 = cst->rep2;
}
cst->rep2 = cst->rep1;
}
cst->rep1 = cst->rep0;
cst->rep0 = distance;
}
cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11;
prob = p + LZMA_REP_LEN_CODER;
}
prob_len = prob + LZMA_LEN_CHOICE;
if (rc_is_bit_0(rc, prob_len)) {
rc_update_bit_0(rc, prob_len);
prob_len = (prob + LZMA_LEN_LOW
+ (pos_state <<
LZMA_LEN_NUM_LOW_BITS));
offset = 0;
num_bits = LZMA_LEN_NUM_LOW_BITS;
} else {
rc_update_bit_1(rc, prob_len);
prob_len = prob + LZMA_LEN_CHOICE_2;
if (rc_is_bit_0(rc, prob_len)) {
rc_update_bit_0(rc, prob_len);
prob_len = (prob + LZMA_LEN_MID
+ (pos_state <<
LZMA_LEN_NUM_MID_BITS));
offset = 1 << LZMA_LEN_NUM_LOW_BITS;
num_bits = LZMA_LEN_NUM_MID_BITS;
} else {
rc_update_bit_1(rc, prob_len);
prob_len = prob + LZMA_LEN_HIGH;
offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
+ (1 << LZMA_LEN_NUM_MID_BITS));
num_bits = LZMA_LEN_NUM_HIGH_BITS;
}
}
rc_bit_tree_decode(rc, prob_len, num_bits, &len);
len += offset;
if (cst->state < 4) {
int pos_slot;
cst->state += LZMA_NUM_LIT_STATES;
prob =
p + LZMA_POS_SLOT +
((len <
LZMA_NUM_LEN_TO_POS_STATES ? len :
LZMA_NUM_LEN_TO_POS_STATES - 1)
<< LZMA_NUM_POS_SLOT_BITS);
rc_bit_tree_decode(rc, prob,
LZMA_NUM_POS_SLOT_BITS,
&pos_slot);
if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
int i, mi;
num_bits = (pos_slot >> 1) - 1;
cst->rep0 = 2 | (pos_slot & 1);
if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
cst->rep0 <<= num_bits;
prob = p + LZMA_SPEC_POS +
cst->rep0 - pos_slot - 1;
} else {
num_bits -= LZMA_NUM_ALIGN_BITS;
while (num_bits--)
cst->rep0 = (cst->rep0 << 1) |
rc_direct_bit(rc);
prob = p + LZMA_ALIGN;
cst->rep0 <<= LZMA_NUM_ALIGN_BITS;
num_bits = LZMA_NUM_ALIGN_BITS;
}
i = 1;
mi = 1;
while (num_bits--) {
if (rc_get_bit(rc, prob + mi, &mi))
cst->rep0 |= i;
i <<= 1;
}
} else
cst->rep0 = pos_slot;
if (++(cst->rep0) == 0)
return 0;
if (cst->rep0 > wr->header->dict_size
|| cst->rep0 > get_pos(wr))
return -1;
}
len += LZMA_MATCH_MIN_LEN;
return copy_bytes(wr, cst->rep0, len);
}
int unlzma(unsigned char *buf, int in_len,
int(*fill)(void*, unsigned int),
int(*flush)(void*, unsigned int),
unsigned char *output,
int *posp,
void(*error)(char *x)
)
{
struct lzma_header header;
int lc, pb, lp;
unsigned int pos_state_mask;
unsigned int literal_pos_mask;
unsigned short int *p;
int num_probs;
struct rc rc;
int i, mi;
struct writer wr;
struct cstate cst;
unsigned char *inbuf;
int ret = -1;
rc.error = error;
if (buf)
inbuf = buf;
else
inbuf = malloc(LZMA_IOBUF_SIZE);
if (!inbuf) {
error("Could not allocate input bufer");
goto exit_0;
}
cst.state = 0;
cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1;
wr.header = &header;
wr.flush = flush;
wr.global_pos = 0;
wr.previous_byte = 0;
wr.buffer_pos = 0;
rc_init(&rc, fill, inbuf, in_len);
for (i = 0; i < sizeof(header); i++) {
if (rc.ptr >= rc.buffer_end)
rc_read(&rc);
((unsigned char *)&header)[i] = *rc.ptr++;
}
if (header.pos >= (9 * 5 * 5)) {
error("bad header");
goto exit_1;
}
mi = 0;
lc = header.pos;
while (lc >= 9) {
mi++;
lc -= 9;
}
pb = 0;
lp = mi;
while (lp >= 5) {
pb++;
lp -= 5;
}
pos_state_mask = (1 << pb) - 1;
literal_pos_mask = (1 << lp) - 1;
ENDIAN_CONVERT(header.dict_size);
ENDIAN_CONVERT(header.dst_size);
if (header.dict_size == 0)
header.dict_size = 1;
if (output)
wr.buffer = output;
else {
wr.bufsize = MIN(header.dst_size, header.dict_size);
wr.buffer = malloc(wr.bufsize);
}
if (wr.buffer == NULL)
goto exit_1;
num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
p = (unsigned short int *) malloc(num_probs * sizeof(*p));
if (p == 0)
goto exit_2;
num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
for (i = 0; i < num_probs; i++)
p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
rc_init_code(&rc);
while (get_pos(&wr) < header.dst_size) {
int pos_state = get_pos(&wr) & pos_state_mask;
unsigned short int *prob = p + LZMA_IS_MATCH +
(cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
if (rc_is_bit_0(&rc, prob)) {
if (process_bit0(&wr, &rc, &cst, p, pos_state, prob,
lc, literal_pos_mask)) {
error("LZMA data is corrupt");
goto exit_3;
}
} else {
if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) {
error("LZMA data is corrupt");
goto exit_3;
}
if (cst.rep0 == 0)
break;
}
if (rc.buffer_size <= 0)
goto exit_3;
}
if (posp)
*posp = rc.ptr-rc.buffer;
if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos)
ret = 0;
exit_3:
free(p);
exit_2:
if (!output)
free(wr.buffer);
exit_1:
if (!buf)
free(inbuf);
exit_0:
return ret;
}
int unlzma_simple(unsigned char *buf, int in_len,
unsigned char *output,
void(*error)(char *x))
{
return unlzma(buf, in_len, NULL, NULL, output, NULL, error);
}