PHP 處理 mht 文件

<?php
/**
 * Created by PhpStorm.
 * User: kungyu
 * Date: 2015/12/3
 * Time: 15:30
 */
class mhtparse {

    var $file = '';
    var $boundary = '';
    var $filedata = '';
    var $countparts = 1;
    var $log = '';

    function extract() {
        $this->read_filedata ();
        $this->file_parts ();

        return 1;
    }

    function set_file($p) {
        $this->file = $p;
    }

    function get_log() {
        return $this->log;
    }

    function file_parts() {
        $lines = explode ( "\n", substr ( $this->filedata, 0, 8192 ) );
        foreach ( $lines as $line ) {
            $line = trim ( $line );
            if (strpos ( $line, '=' ) !== FALSE) {
                if (strpos ( $line, 'boundary', 0 ) !== FALSE) {
                    $range = $this->getrange ( $line, '"', '"', 0 );
                    $this->boundary = "--" . $range ['range'];
                    $this->filedata = str_replace ( $line, '', $this->filedata );
                    break;
                }
            }
        }
        if ($this->boundary != '') {
            $this->filedata = explode ( $this->boundary, $this->filedata );
            unset ( $this->filedata [0] );
            $this->filedata = array_values ( $this->filedata );
            $this->countparts = count ( $this->filedata );
        } else {
            $tmp = $this->filedata;
            $this->filedata = array (
                $tmp
            );
        }
    }

    function get_all_part_file() {
        return $this->filedata;
    }

    function get_part_to_file($i) {
        $line_data_start = 0;
        $encoding = '';
        $part_lines = explode ( "\n", ltrim ( $this->filedata [$i] ) );
        foreach ( $part_lines as $line_id => $line ) {
            $line = trim ( $line );
            if ($line == '') {
                if (trim ( $part_lines [0] ) == '--')
                    return 1;
                $line_data_start = $line_id;
                break;
            }
            if (strpos ( $line, ':' ) !== FALSE) {
                $pos = strpos ( $line, ':' );
                $k = strtolower ( trim ( substr ( $line, 0, $pos ) ) );
                $v = trim ( substr ( $line, $pos + 1, strlen ( $line ) ) );
                if ($k == 'content-transfer-encoding') {
                    $encoding = $v;
                }
                if ($k == 'content-location') {
                    $location = $v;
                }
                if ($k == 'content-type') {
                    $contenttype = $v;
                }
            }
        }

        foreach ( $part_lines as $line_id => $line ) {
            if ($line_id <= $line_data_start)
                $part_lines [$line_id] = '';
        }

        $part_lines = implode ( '', $part_lines );
        if ($encoding == 'base64')
            $part_lines = base64_decode ( $part_lines );
        elseif ($encoding == 'quoted-printable')
            $part_lines = imap_qprint ( $part_lines );

        return $part_lines;
    }

    function read_filedata() {
        $handle = fopen ( $this->file, 'r' );
        $this->filedata = fread ( $handle, filesize ( $this->file ) );
        fclose ( $handle );
    }

    function getrange(&$subject, $Beginmark_str = '{', $Endmark_str = '}', $Start_pos = 0) {
        /*
         * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,[email protected],http://www.hot.ee/ukjoesaar,+3725110693
         */
        if (empty ( $Beginmark_str ))
            $Beginmark_str = '{';
        $Beginmark_str_len = strlen ( $Beginmark_str );

        if (empty ( $Endmark_str ))
            $Endmark_str = '}';
        $Endmark_str_len = strlen ( $Endmark_str );

        /* $Start_pos_cache = 0; */
        do {
            /* !algus */
            if (! is_int ( $Begin_firstOccurence_pos ))
                $Start_pos_cache = $Start_pos;

            /* ?algus-test */
            $Start_pos_cache = @strpos ( $subject, $Beginmark_str, $Start_pos_cache );

            /* this is possible start for range */
            if (is_int ( $Start_pos_cache )) {
                /* skip */
                $Start_pos_cache = ($Start_pos_cache + $Beginmark_str_len);
                /* test possible range start pos */
                if (is_int ( $Begin_firstOccurence_pos )) {
                    if ($Start_pos_cache < $range_end_pos)
                        $rangeClean = 0;
                    elseif ($Start_pos_cache > $range_end_pos)
                        $rangeClean = 1;
                }
                /* here it is */
                if (! is_int ( $Begin_firstOccurence_pos ))
                    $Begin_firstOccurence_pos = $Start_pos_cache;
            } /* VIGA NR 0 ALGUST EI OLE */

            if (! is_int ( $Start_pos_cache )) {
                /* !algus */
                /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
                if (is_int ( $Begin_firstOccurence_pos ) and ($Start_pos_cache < $range_end_pos))
                    $rangeClean = 1;
                else
                    return false;
            }
            if (is_int ( $Begin_firstOccurence_pos ) and ($rangeClean != 1)) {
                if (! is_int ( $End_pos_cache ))
                    $End_sequel_pos = $Begin_firstOccurence_pos;

                $End_pos_cache = strpos ( $subject, $Endmark_str, $End_sequel_pos );

                /* ok */
                if (is_int ( $End_pos_cache ) and ($rangeClean != 1)) {
                    $range_current_lenght = ($End_pos_cache - $Begin_firstOccurence_pos);
                    $End_sequel_pos = ($End_pos_cache + $Endmark_str_len);
                    $range_end_pos = $End_pos_cache;
                }
                /* VIGA NR 2 LOPPU EI LEITUD */
                if (! is_int ( $End_pos_cache ))
                    if ($End_pos_cache == false)
                        return false;
            }
        } while ( $rangeClean < 1 );

        if (is_int ( $Begin_firstOccurence_pos ) and is_int ( $range_current_lenght ))
            $Range = substr ( $subject, $Begin_firstOccurence_pos, $range_current_lenght );
        else
            return false;

        return array (
            'range' => $Range,
            'begin' => $Begin_firstOccurence_pos,
            'end' => $End_sequel_pos
        );
    } // end getrange()
} // class
/*$filename = './test.mht';
if (file_exists ( $filename )) {
    if (is_dir ( $filename )) return false;

    $filename = strtolower ( $filename );
    if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;


    $o_mhtml = new mhtparse ();
    $o_mhtml->set_file ( $filename );
    $o_mhtml->extract ();
    $res =  $o_mhtml->get_part_to_file(0);
    var_dump($res);
}*/
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章