488 lines
15 KiB
PHP
488 lines
15 KiB
PHP
|
<?php
|
||
|
// Information needed to parse an element type
|
||
|
class EBMLElementType {
|
||
|
public $name;
|
||
|
public $datatype;
|
||
|
public $validParents;
|
||
|
}
|
||
|
|
||
|
// Information needed to parse all possible element types in a document
|
||
|
class EBMLElementTypeList {
|
||
|
private $_els;
|
||
|
|
||
|
public function __construct($filename) {
|
||
|
$lines = file($filename);
|
||
|
foreach($lines as $line) {
|
||
|
$fields = explode(' ', trim($line));
|
||
|
$t = new EBMLElementType;
|
||
|
$id = hexdec($fields[0]);
|
||
|
$t->datatype = $fields[1];
|
||
|
$t->name = $fields[2];
|
||
|
$t->validParents = array();
|
||
|
for ($i = 0; $i + 3 < count($fields); $i++) {
|
||
|
if ($fields[$i+3] == '*' || $fields[$i+3] == 'root') {
|
||
|
$t->validParents[$i] = $fields[$i+3];
|
||
|
} else {
|
||
|
$t->validParents[$i] = hexdec($fields[$i+3]);
|
||
|
}
|
||
|
}
|
||
|
$this->_els[$id] = $t;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function exists($id) {
|
||
|
return isset($this->_els[$id]);
|
||
|
}
|
||
|
|
||
|
public function name($id) {
|
||
|
if (!isset($this->_els[$id])) return NULL;
|
||
|
return $this->_els[$id]->name;
|
||
|
}
|
||
|
|
||
|
public function datatype($id) {
|
||
|
if ($id == 'root') return 'container';
|
||
|
if (!isset($this->_els[$id])) return 'binary';
|
||
|
return $this->_els[$id]->datatype;
|
||
|
}
|
||
|
|
||
|
public function validChild($id1, $id2) {
|
||
|
if (!isset($this->_els[$id2])) return TRUE;
|
||
|
$parents = $this->_els[$id2]->validParents;
|
||
|
return in_array('*', $parents) || in_array($id1, $parents);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Decode big-endian integer
|
||
|
function ebmlDecodeInt($data, $signed=FALSE, $carryIn=0) {
|
||
|
$n = $carryIn;
|
||
|
if (strlen($data) > 8) throw new Exception('not supported: integer too long');
|
||
|
for ($i = 0; $i < strlen($data); $i++) {
|
||
|
if ($n > (PHP_INT_MAX >> 8) || $n < ((-PHP_INT_MAX-1) >> 8)) {
|
||
|
$n = floatval($n);
|
||
|
}
|
||
|
$n = $n * 0x100 + ord($data[$i]);
|
||
|
if ($i == 0 && $signed && ($n & 0x80) != 0) {
|
||
|
$n -= 0x100;
|
||
|
}
|
||
|
}
|
||
|
return $n;
|
||
|
}
|
||
|
|
||
|
// Decode big-endian IEEE float
|
||
|
function ebmlDecodeFloat($data) {
|
||
|
switch (strlen($data)) {
|
||
|
case 0:
|
||
|
return 0;
|
||
|
case 4:
|
||
|
switch(pack('f', 1e9)) {
|
||
|
case '(knN':
|
||
|
$arr = unpack('f', strrev($data));
|
||
|
return $arr[1];
|
||
|
case 'Nnk(':
|
||
|
$arr = unpack('f', $data);
|
||
|
return $arr[1];
|
||
|
default:
|
||
|
error_log('cannot decode floats');
|
||
|
return NULL;
|
||
|
}
|
||
|
case 8:
|
||
|
switch(pack('d', 1e9)) {
|
||
|
case "\x00\x00\x00\x00\x65\xcd\xcd\x41":
|
||
|
$arr = unpack('d', strrev($data));
|
||
|
return $arr[1];
|
||
|
case "\x41\xcd\xcd\x65\x00\x00\x00\x00":
|
||
|
$arr = unpack('d', $data);
|
||
|
return $arr[1];
|
||
|
default:
|
||
|
error_log('cannot decode floats');
|
||
|
return NULL;
|
||
|
}
|
||
|
default:
|
||
|
error_log('unsupported float length');
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Decode big-endian signed offset from Jan 01, 2000 in nanoseconds
|
||
|
// Convert to offset from Jan 01, 1970 in seconds
|
||
|
function ebmlDecodeDate($data) {
|
||
|
return ebmlDecodeInt($data, TRUE) * 1e-9 + 946684800;
|
||
|
}
|
||
|
|
||
|
// Decode data of specified datatype
|
||
|
function ebmlDecode($data, $datatype) {
|
||
|
switch ($datatype) {
|
||
|
case 'int': return ebmlDecodeInt($data, TRUE);
|
||
|
case 'uint': return ebmlDecodeInt($data, FALSE);
|
||
|
case 'float': return ebmlDecodeFloat($data);
|
||
|
case 'string': return chop($data, "\0");
|
||
|
case 'date': return ebmlDecodeDate($data);
|
||
|
case 'binary': return $data;
|
||
|
default: throw new Exception('unknown datatype');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Methods for reading data from section of EBML file
|
||
|
class EBMLReader {
|
||
|
private $_fileHandle;
|
||
|
private $_offset;
|
||
|
private $_size;
|
||
|
private $_position;
|
||
|
|
||
|
public function __construct($fileHandle, $offset=0, $size=NULL) {
|
||
|
$this->_fileHandle = $fileHandle;
|
||
|
$this->_offset = $offset;
|
||
|
$this->_size = $size;
|
||
|
$this->_position = 0;
|
||
|
}
|
||
|
|
||
|
// Tell position within data section
|
||
|
public function position() {
|
||
|
return $this->_position;
|
||
|
}
|
||
|
|
||
|
// Set position within data section
|
||
|
public function setPosition($position) {
|
||
|
$this->_position = $position;
|
||
|
}
|
||
|
|
||
|
// Total size of data section (NULL if unknown)
|
||
|
public function size() {
|
||
|
return $this->_size;
|
||
|
}
|
||
|
|
||
|
// Set end of data section
|
||
|
public function setSize($size) {
|
||
|
if ($this->_size === NULL) {
|
||
|
$this->_size = $size;
|
||
|
} else {
|
||
|
throw new Exception('size already set');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Determine whether we are at end of data
|
||
|
public function endOfData() {
|
||
|
if ($this->_size === NULL) {
|
||
|
fseek($this->_fileHandle, $this->_offset + $this->_position);
|
||
|
fread($this->_fileHandle, 1);
|
||
|
if (feof($this->_fileHandle)) {
|
||
|
$this->_size = $this->_position;
|
||
|
return TRUE;
|
||
|
} else {
|
||
|
return FALSE;
|
||
|
}
|
||
|
} else {
|
||
|
return $this->_position >= $this->_size;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Create EBMLReader containing $size bytes and advance
|
||
|
public function nextSlice($size) {
|
||
|
$slice = new EBMLReader($this->_fileHandle, $this->_offset + $this->_position, $size);
|
||
|
if ($size !== NULL) {
|
||
|
$this->_position += $size;
|
||
|
if ($this->_size !== NULL && $this->_position > $this->_size) {
|
||
|
throw new Exception('unexpected end of data');
|
||
|
}
|
||
|
}
|
||
|
return $slice;
|
||
|
}
|
||
|
|
||
|
// Read entire region
|
||
|
public function readAll() {
|
||
|
if ($this->_size == 0) return '';
|
||
|
if ($this->_size === NULL) throw new Exception('unknown length');
|
||
|
fseek($this->_fileHandle, $this->_offset);
|
||
|
$data = fread($this->_fileHandle, $this->_size);
|
||
|
if ($data === FALSE || strlen($data) != $this->_size) {
|
||
|
throw new Exception('error reading from file');
|
||
|
}
|
||
|
return $data;
|
||
|
}
|
||
|
|
||
|
// Read $size bytes
|
||
|
public function read($size) {
|
||
|
return $this->nextSlice($size)->readAll();
|
||
|
}
|
||
|
|
||
|
// Read variable-length integer
|
||
|
public function readVarInt($signed=FALSE) {
|
||
|
// Read size and remove flag
|
||
|
$n = ord($this->read(1));
|
||
|
$size = 0;
|
||
|
if ($n == 0) {
|
||
|
throw new Exception('not supported: variable-length integer too long');
|
||
|
}
|
||
|
$flag = 0x80;
|
||
|
while (($n & $flag) == 0) {
|
||
|
$flag = $flag >> 1;
|
||
|
$size++;
|
||
|
}
|
||
|
$n -= $flag;
|
||
|
|
||
|
// Read remaining data
|
||
|
$rawInt = $this->read($size);
|
||
|
|
||
|
// Check for all ones
|
||
|
if ($n == $flag - 1 && $rawInt == str_repeat("\xFF", $size)) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
// Range shift for signed integers
|
||
|
if ($signed) {
|
||
|
if ($flag == 0x01) {
|
||
|
$n = ord($rawInt[0]) - 0x80;
|
||
|
$rawInt = $rawInt.substr(1);
|
||
|
} else {
|
||
|
$n -= ($flag >> 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Convert to integer
|
||
|
$n = ebmlDecodeInt($rawInt, FALSE, $n);
|
||
|
|
||
|
// Range shift for signed integers
|
||
|
if ($signed) {
|
||
|
if ($n == PHP_INT_MAX) {
|
||
|
$n = floatval($n);
|
||
|
}
|
||
|
$n++;
|
||
|
}
|
||
|
|
||
|
return $n;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// EBML element
|
||
|
class EBMLElement {
|
||
|
private $_id;
|
||
|
private $_name;
|
||
|
private $_datatype;
|
||
|
private $_content;
|
||
|
private $_headSize;
|
||
|
protected $_elementTypeList;
|
||
|
|
||
|
public function __construct($id, $content, $headSize, $elementTypeList) {
|
||
|
$this->_id = $id;
|
||
|
$this->_name = $elementTypeList->name($this->_id);
|
||
|
$this->_datatype = $elementTypeList->datatype($this->_id);
|
||
|
$this->_content = $content;
|
||
|
$this->_headSize = $headSize;
|
||
|
$this->_elementTypeList = $elementTypeList;
|
||
|
}
|
||
|
|
||
|
public function id() {return $this->_id;}
|
||
|
public function name() {return $this->_name;}
|
||
|
public function datatype() {return $this->_datatype;}
|
||
|
public function content() {return $this->_content;}
|
||
|
public function headSize() {return $this->_headSize;}
|
||
|
|
||
|
// Total size of element (including ID and datasize)
|
||
|
public function size() {
|
||
|
return $this->_headSize + $this->_content->size();
|
||
|
}
|
||
|
|
||
|
// Read and interpret content
|
||
|
public function value() {
|
||
|
if ($this->_datatype == 'binary') {
|
||
|
return $this->_content;
|
||
|
} else {
|
||
|
return ebmlDecode($this->_content->readAll(), $this->_datatype);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Iterate over EBML elements in data
|
||
|
class EBMLElementList extends EBMLElement implements Iterator {
|
||
|
private $_cache;
|
||
|
private $_position;
|
||
|
private static $MAX_ELEMENTS = 10000;
|
||
|
|
||
|
public function __construct($id, $content, $headSize, $elementTypeList) {
|
||
|
parent::__construct($id, $content, $headSize, $elementTypeList);
|
||
|
$this->_cache = array();
|
||
|
$this->_position = 0;
|
||
|
}
|
||
|
|
||
|
public function rewind() {
|
||
|
$this->_position = 0;
|
||
|
}
|
||
|
|
||
|
public function current() {
|
||
|
if ($this->valid()) {
|
||
|
return $this->_cache[$this->_position];
|
||
|
} else {
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function key() {
|
||
|
return $this->_position;
|
||
|
}
|
||
|
|
||
|
public function next() {
|
||
|
$this->_position += $this->current()->size();
|
||
|
if ($this->content()->size() !== NULL && $this->_position > $this->content()->size()) {
|
||
|
throw new Exception('unexpected end of data');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function valid() {
|
||
|
if (isset($this->_cache[$this->_position])) return TRUE;
|
||
|
$this->content()->setPosition($this->_position);
|
||
|
if ($this->content()->endOfData()) return FALSE;
|
||
|
$id = $this->content()->readVarInt();
|
||
|
if ($id === NULL) throw new Exception('invalid ID');
|
||
|
if ($this->content()->size() === NULL && !$this->_elementTypeList->validChild($this->id(), $id)) {
|
||
|
$this->content()->setSize($this->_position);
|
||
|
return FALSE;
|
||
|
}
|
||
|
$size = $this->content()->readVarInt();
|
||
|
$headSize = $this->content()->position() - $this->_position;
|
||
|
$content = $this->content()->nextSlice($size);
|
||
|
if ($this->_elementTypeList->datatype($id) == 'container') {
|
||
|
$element = new EBMLElementList($id, $content, $headSize, $this->_elementTypeList);
|
||
|
} else {
|
||
|
if ($size === NULL) {
|
||
|
throw new Exception('non-container element of unknown size');
|
||
|
}
|
||
|
$element = new EBMLElement($id, $content, $headSize, $this->_elementTypeList);
|
||
|
}
|
||
|
$this->_cache[$this->_position] = $element;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
// Total size of element (including ID and size)
|
||
|
public function size() {
|
||
|
if ($this->content()->size() === NULL) {
|
||
|
$iElement = 0;
|
||
|
foreach ($this as $element) { // iterate over elements to find end
|
||
|
$iElement++;
|
||
|
if ($iElement > self::$MAX_ELEMENTS) throw new Exception('not supported: too many elements');
|
||
|
}
|
||
|
}
|
||
|
return $this->headSize() + $this->content()->size();
|
||
|
}
|
||
|
|
||
|
// Read and interpret content
|
||
|
public function value() {
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
// Get element value by name
|
||
|
public function get($name, $defaultValue=NULL) {
|
||
|
$iElement = 0;
|
||
|
foreach ($this as $element) {
|
||
|
$iElement++;
|
||
|
if ($iElement > self::$MAX_ELEMENTS) throw new Exception('not supported: too many elements');
|
||
|
if ($element->name() == $name) {
|
||
|
return $element->value();
|
||
|
}
|
||
|
}
|
||
|
return $defaultValue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Parse block
|
||
|
class MatroskaBlock {
|
||
|
const LACING_NONE = 0;
|
||
|
const LACING_XIPH = 1;
|
||
|
const LACING_EBML = 3;
|
||
|
const LACING_FIXED = 2;
|
||
|
public $trackNumber;
|
||
|
public $timecode;
|
||
|
public $keyframe;
|
||
|
public $invisible;
|
||
|
public $lacing;
|
||
|
public $discardable;
|
||
|
public $frames;
|
||
|
|
||
|
public function __construct($reader) {
|
||
|
# Header
|
||
|
$this->trackNumber = $reader->readVarInt();
|
||
|
$this->timecode = ebmlDecodeInt($reader->read(2), TRUE);
|
||
|
$flags = ord($reader->read(1));
|
||
|
if (($flags & 0x70) != 0) {
|
||
|
throw new Exception('reserved flags set');
|
||
|
}
|
||
|
$this->keyframe = (($flags & 0x80) != 0);
|
||
|
$this->invisible = (($flags & 0x08) != 0);
|
||
|
$this->lacing = ($flags >> 1) & 0x03;
|
||
|
$this->discardable = (($flags & 0x01) != 0);
|
||
|
|
||
|
# Lacing sizes
|
||
|
if ($this->lacing == self::LACING_NONE) {
|
||
|
$nsizes = 0;
|
||
|
} else {
|
||
|
$nsizes = ord($reader->read(1));
|
||
|
}
|
||
|
$sizes = array();
|
||
|
switch ($this->lacing) {
|
||
|
case self::LACING_XIPH:
|
||
|
for ($i = 0; $i < $nsizes; $i++) {
|
||
|
$size = 0;
|
||
|
$x = 255;
|
||
|
while ($x == 255) {
|
||
|
$x = ord($reader->read(1));
|
||
|
$size += $x;
|
||
|
if ($size > 65536) throw new Exception('not supported: laced frame too long');
|
||
|
}
|
||
|
$sizes[$i] = $size;
|
||
|
}
|
||
|
break;
|
||
|
case self::LACING_EBML:
|
||
|
$size = 0;
|
||
|
for ($i = 0; $i < $nsizes; $i++) {
|
||
|
$dsize = $reader->readVarInt($i != 0);
|
||
|
if ($dsize === NULL || $size + $dsize < 0) {
|
||
|
throw new Exception('invalid frame size');
|
||
|
}
|
||
|
$size += $dsize;
|
||
|
$sizes[$i] = $size;
|
||
|
}
|
||
|
break;
|
||
|
case self::LACING_FIXED:
|
||
|
$lenRemaining = $reader->size() - $reader->position();
|
||
|
if ($lenRemaining % ($nsizes + 1) != 0) {
|
||
|
throw new Exception('data size not divisible by frame count');
|
||
|
}
|
||
|
$size = (int) ($lenRemaining / ($nsizes + 1));
|
||
|
for ($i = 0; $i < $nsizes; $i++) {
|
||
|
$sizes[$i] = $size;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
# Frames
|
||
|
$this->frames = array();
|
||
|
for ($i = 0; $i < $nsizes; $i++) {
|
||
|
$this->frames[$i] = $reader->nextSlice($sizes[$i]);
|
||
|
}
|
||
|
$this->frames[$nsizes] = $reader->nextSlice($reader->size() - $reader->position());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Create element list from $fileHandle
|
||
|
function readMatroska($fileHandle) {
|
||
|
$reader = new EBMLReader($fileHandle);
|
||
|
if ($reader->read(4) != "\x1a\x45\xdf\xa3") {
|
||
|
throw new Exception('not an EBML file');
|
||
|
}
|
||
|
$matroskaElementTypeList = new EBMLElementTypeList(dirname(__FILE__) . '/matroska-elements.txt');
|
||
|
$root = new EBMLElementList('root', $reader, 0, $matroskaElementTypeList);
|
||
|
$header = $root->get('EBML');
|
||
|
$ebmlVersion = $header->get('EBMLReadVersion', 1);
|
||
|
$docType = $header->get('DocType');
|
||
|
$docTypeVersion = $header->get('DocTypeReadVersion', 1);
|
||
|
if ($ebmlVersion != 1) {
|
||
|
throw new Exception('unsupported EBML version');
|
||
|
}
|
||
|
if ($docType != 'matroska' && $docType != 'webm') {
|
||
|
throw new Exception ('unsupported document type');
|
||
|
}
|
||
|
if ($docTypeVersion < 1 || $docTypeVersion > 4) {
|
||
|
throw new Exception ('unsupported document type version');
|
||
|
}
|
||
|
return $root;
|
||
|
}
|