Dotclear

source: build-tools/Minifier.php @ 3874:ab8368569446

Revision 3874:ab8368569446, 17.5 KB checked in by franck <carnet.franck.paul@…>, 7 years ago (diff)

short notation for array (array() → [])

Line 
1<?php
2/*
3 * This file is part of the JShrink package.
4 *
5 * (c) Robert Hafner <tedivm@tedivm.com>
6 *
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
9 */
10
11/**
12 * JShrink
13 *
14 *
15 * @package    JShrink
16 * @author     Robert Hafner <tedivm@tedivm.com>
17 */
18
19namespace JShrink;
20
21/**
22 * Minifier
23 *
24 * Usage - Minifier::minify($js);
25 * Usage - Minifier::minify($js, $options);
26 * Usage - Minifier::minify($js, ['flaggedComments' => false]);
27 *
28 * @package JShrink
29 * @author Robert Hafner <tedivm@tedivm.com>
30 * @license http://www.opensource.org/licenses/bsd-license.php  BSD License
31 */
32class Minifier
33{
34    /**
35     * The input javascript to be minified.
36     *
37     * @var string
38     */
39    protected $input;
40
41    /**
42     * The location of the character (in the input string) that is next to be
43     * processed.
44     *
45     * @var int
46     */
47    protected $index = 0;
48
49    /**
50     * The first of the characters currently being looked at.
51     *
52     * @var string
53     */
54    protected $a = '';
55
56    /**
57     * The next character being looked at (after a);
58     *
59     * @var string
60     */
61    protected $b = '';
62
63    /**
64     * This character is only active when certain look ahead actions take place.
65     *
66     *  @var string
67     */
68    protected $c;
69
70    /**
71     * Contains the options for the current minification process.
72     *
73     * @var array
74     */
75    protected $options;
76
77    /**
78     * Contains the default options for minification. This array is merged with
79     * the one passed in by the user to create the request specific set of
80     * options (stored in the $options attribute).
81     *
82     * @var array
83     */
84    protected static $defaultOptions = ['flaggedComments' => true];
85
86    /**
87     * Contains lock ids which are used to replace certain code patterns and
88     * prevent them from being minified
89     *
90     * @var array
91     */
92    protected $locks = [];
93
94    /**
95     * Takes a string containing javascript and removes unneeded characters in
96     * order to shrink the code without altering it's functionality.
97     *
98     * @param  string      $js      The raw javascript to be minified
99     * @param  array       $options Various runtime options in an associative array
100     * @throws \Exception
101     * @return bool|string
102     */
103    public static function minify($js, $options = [])
104    {
105        try {
106            ob_start();
107
108            $jshrink = new Minifier();
109            $js = $jshrink->lock($js);
110            $jshrink->minifyDirectToOutput($js, $options);
111
112            // Sometimes there's a leading new line, so we trim that out here.
113            $js = ltrim(ob_get_clean());
114            $js = $jshrink->unlock($js);
115            unset($jshrink);
116
117            return $js;
118
119        } catch (\Exception $e) {
120
121            if (isset($jshrink)) {
122                // Since the breakdownScript function probably wasn't finished
123                // we clean it out before discarding it.
124                $jshrink->clean();
125                unset($jshrink);
126            }
127
128            // without this call things get weird, with partially outputted js.
129            ob_end_clean();
130            throw $e;
131        }
132    }
133
134    /**
135     * Processes a javascript string and outputs only the required characters,
136     * stripping out all unneeded characters.
137     *
138     * @param string $js      The raw javascript to be minified
139     * @param array  $options Various runtime options in an associative array
140     */
141    protected function minifyDirectToOutput($js, $options)
142    {
143        $this->initialize($js, $options);
144        $this->loop();
145        $this->clean();
146    }
147
148    /**
149     *  Initializes internal variables, normalizes new lines,
150     *
151     * @param string $js      The raw javascript to be minified
152     * @param array  $options Various runtime options in an associative array
153     */
154    protected function initialize($js, $options)
155    {
156        $this->options = array_merge(static::$defaultOptions, $options);
157        $js = str_replace("\r\n", "\n", $js);
158        $js = str_replace('/**/', '', $js);
159        $this->input = str_replace("\r", "\n", $js);
160
161        // We add a newline to the end of the script to make it easier to deal
162        // with comments at the bottom of the script- this prevents the unclosed
163        // comment error that can otherwise occur.
164        $this->input .= PHP_EOL;
165
166        // Populate "a" with a new line, "b" with the first character, before
167        // entering the loop
168        $this->a = "\n";
169        $this->b = $this->getReal();
170    }
171
172    /**
173     * The primary action occurs here. This function loops through the input string,
174     * outputting anything that's relevant and discarding anything that is not.
175     */
176    protected function loop()
177    {
178        while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
179
180            switch ($this->a) {
181                // new lines
182                case "\n":
183                    // if the next line is something that can't stand alone preserve the newline
184                    if (strpos('(-+{[@', $this->b) !== false) {
185                        echo $this->a;
186                        $this->saveString();
187                        break;
188                    }
189
190                    // if B is a space we skip the rest of the switch block and go down to the
191                    // string/regex check below, resetting $this->b with getReal
192                    if($this->b === ' ')
193                        break;
194
195                // otherwise we treat the newline like a space
196
197                case ' ':
198                    if(static::isAlphaNumeric($this->b))
199                        echo $this->a;
200
201                    $this->saveString();
202                    break;
203
204                default:
205                    switch ($this->b) {
206                        case "\n":
207                            if (strpos('}])+-"\'', $this->a) !== false) {
208                                echo $this->a;
209                                $this->saveString();
210                                break;
211                            } else {
212                                if (static::isAlphaNumeric($this->a)) {
213                                    echo $this->a;
214                                    $this->saveString();
215                                }
216                            }
217                            break;
218
219                        case ' ':
220                            if(!static::isAlphaNumeric($this->a))
221                                break;
222
223                        default:
224                            // check for some regex that breaks stuff
225                            if ($this->a === '/' && ($this->b === '\'' || $this->b === '"')) {
226                                $this->saveRegex();
227                                continue;
228                            }
229
230                            echo $this->a;
231                            $this->saveString();
232                            break;
233                    }
234            }
235
236            // do reg check of doom
237            $this->b = $this->getReal();
238
239            if(($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false))
240                $this->saveRegex();
241        }
242    }
243
244    /**
245     * Resets attributes that do not need to be stored between requests so that
246     * the next request is ready to go. Another reason for this is to make sure
247     * the variables are cleared and are not taking up memory.
248     */
249    protected function clean()
250    {
251        unset($this->input);
252        $this->index = 0;
253        $this->a = $this->b = '';
254        unset($this->c);
255        unset($this->options);
256    }
257
258    /**
259     * Returns the next string for processing based off of the current index.
260     *
261     * @return string
262     */
263    protected function getChar()
264    {
265        // Check to see if we had anything in the look ahead buffer and use that.
266        if (isset($this->c)) {
267            $char = $this->c;
268            unset($this->c);
269
270        // Otherwise we start pulling from the input.
271        } else {
272            $char = substr($this->input, $this->index, 1);
273
274            // If the next character doesn't exist return false.
275            if (isset($char) && $char === false) {
276                return false;
277            }
278
279            // Otherwise increment the pointer and use this char.
280            $this->index++;
281        }
282
283        // Normalize all whitespace except for the newline character into a
284        // standard space.
285        if($char !== "\n" && ord($char) < 32)
286
287            return ' ';
288
289        return $char;
290    }
291
292    /**
293     * This function gets the next "real" character. It is essentially a wrapper
294     * around the getChar function that skips comments. This has significant
295     * performance benefits as the skipping is done using native functions (ie,
296     * c code) rather than in script php.
297     *
298     *
299     * @return string            Next 'real' character to be processed.
300     * @throws \RuntimeException
301     */
302    protected function getReal()
303    {
304        $startIndex = $this->index;
305        $char = $this->getChar();
306
307        // Check to see if we're potentially in a comment
308        if ($char !== '/') {
309            return $char;
310        }
311
312        $this->c = $this->getChar();
313
314        if ($this->c === '/') {
315            return $this->processOneLineComments($startIndex);
316
317        } elseif ($this->c === '*') {
318            return $this->processMultiLineComments($startIndex);
319        }
320
321        return $char;
322    }
323
324    /**
325     * Removed one line comments, with the exception of some very specific types of
326     * conditional comments.
327     *
328     * @param  int    $startIndex The index point where "getReal" function started
329     * @return string
330     */
331    protected function processOneLineComments($startIndex)
332    {
333        $thirdCommentString = substr($this->input, $this->index, 1);
334
335        // kill rest of line
336        $this->getNext("\n");
337
338        if ($thirdCommentString == '@') {
339            $endPoint = $this->index - $startIndex;
340            unset($this->c);
341            $char = "\n" . substr($this->input, $startIndex, $endPoint);
342        } else {
343            // first one is contents of $this->c
344            $this->getChar();
345            $char = $this->getChar();
346        }
347
348        return $char;
349    }
350
351    /**
352     * Skips multiline comments where appropriate, and includes them where needed.
353     * Conditional comments and "license" style blocks are preserved.
354     *
355     * @param  int               $startIndex The index point where "getReal" function started
356     * @return bool|string       False if there's no character
357     * @throws \RuntimeException Unclosed comments will throw an error
358     */
359    protected function processMultiLineComments($startIndex)
360    {
361        $this->getChar(); // current C
362        $thirdCommentString = $this->getChar();
363
364        // kill everything up to the next */ if it's there
365        if ($this->getNext('*/')) {
366
367            $this->getChar(); // get *
368            $this->getChar(); // get /
369            $char = $this->getChar(); // get next real character
370
371            // Now we reinsert conditional comments and YUI-style licensing comments
372            if (($this->options['flaggedComments'] && $thirdCommentString === '!')
373                || ($thirdCommentString === '@') ) {
374
375                // If conditional comments or flagged comments are not the first thing in the script
376                // we need to echo a and fill it with a space before moving on.
377                if ($startIndex > 0) {
378                    echo $this->a;
379                    $this->a = " ";
380
381                    // If the comment started on a new line we let it stay on the new line
382                    if ($this->input[($startIndex - 1)] === "\n") {
383                        echo "\n";
384                    }
385                }
386
387                $endPoint = ($this->index - 1) - $startIndex;
388                echo substr($this->input, $startIndex, $endPoint);
389
390                return $char;
391            }
392
393        } else {
394            $char = false;
395        }
396
397        if($char === false)
398            throw new \RuntimeException('Unclosed multiline comment at position: ' . ($this->index - 2));
399
400        // if we're here c is part of the comment and therefore tossed
401        if(isset($this->c))
402            unset($this->c);
403
404        return $char;
405    }
406
407    /**
408     * Pushes the index ahead to the next instance of the supplied string. If it
409     * is found the first character of the string is returned and the index is set
410     * to it's position.
411     *
412     * @param  string       $string
413     * @return string|false Returns the first character of the string or false.
414     */
415    protected function getNext($string)
416    {
417        // Find the next occurrence of "string" after the current position.
418        $pos = strpos($this->input, $string, $this->index);
419
420        // If it's not there return false.
421        if($pos === false)
422
423            return false;
424
425        // Adjust position of index to jump ahead to the asked for string
426        $this->index = $pos;
427
428        // Return the first character of that string.
429        return substr($this->input, $this->index, 1);
430    }
431
432    /**
433     * When a javascript string is detected this function crawls for the end of
434     * it and saves the whole string.
435     *
436     * @throws \RuntimeException Unclosed strings will throw an error
437     */
438    protected function saveString()
439    {
440        $startpos = $this->index;
441
442        // saveString is always called after a gets cleared, so we push b into
443        // that spot.
444        $this->a = $this->b;
445
446        // If this isn't a string we don't need to do anything.
447        if ($this->a !== "'" && $this->a !== '"') {
448            return;
449        }
450
451        // String type is the quote used, " or '
452        $stringType = $this->a;
453
454        // Echo out that starting quote
455        echo $this->a;
456
457        // Loop until the string is done
458        while (true) {
459
460            // Grab the very next character and load it into a
461            $this->a = $this->getChar();
462
463            switch ($this->a) {
464
465                // If the string opener (single or double quote) is used
466                // output it and break out of the while loop-
467                // The string is finished!
468                case $stringType:
469                    break 2;
470
471                // New lines in strings without line delimiters are bad- actual
472                // new lines will be represented by the string \n and not the actual
473                // character, so those will be treated just fine using the switch
474                // block below.
475                case "\n":
476                    throw new \RuntimeException('Unclosed string at position: ' . $startpos );
477                    break;
478
479                // Escaped characters get picked up here. If it's an escaped new line it's not really needed
480                case '\\':
481
482                    // a is a slash. We want to keep it, and the next character,
483                    // unless it's a new line. New lines as actual strings will be
484                    // preserved, but escaped new lines should be reduced.
485                    $this->b = $this->getChar();
486
487                    // If b is a new line we discard a and b and restart the loop.
488                    if ($this->b === "\n") {
489                        break;
490                    }
491
492                    // echo out the escaped character and restart the loop.
493                    echo $this->a . $this->b;
494                    break;
495
496
497                // Since we're not dealing with any special cases we simply
498                // output the character and continue our loop.
499                default:
500                    echo $this->a;
501            }
502        }
503    }
504
505    /**
506     * When a regular expression is detected this function crawls for the end of
507     * it and saves the whole regex.
508     *
509     * @throws \RuntimeException Unclosed regex will throw an error
510     */
511    protected function saveRegex()
512    {
513        echo $this->a . $this->b;
514
515        while (($this->a = $this->getChar()) !== false) {
516            if($this->a === '/')
517                break;
518
519            if ($this->a === '\\') {
520                echo $this->a;
521                $this->a = $this->getChar();
522            }
523
524            if($this->a === "\n")
525                throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
526
527            echo $this->a;
528        }
529        $this->b = $this->getReal();
530    }
531
532    /**
533     * Checks to see if a character is alphanumeric.
534     *
535     * @param  string $char Just one character
536     * @return bool
537     */
538    protected static function isAlphaNumeric($char)
539    {
540        return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/';
541    }
542
543    /**
544     * Replace patterns in the given string and store the replacement
545     *
546     * @param  string $js The string to lock
547     * @return bool
548     */
549    protected function lock($js)
550    {
551        /* lock things like <code>"asd" + ++x;</code> */
552        $lock = '"LOCK---' . crc32(time()) . '"';
553
554        $matches = [];
555        preg_match('/([+-])(\s+)([+-])/S', $js, $matches);
556        if (empty($matches)) {
557            return $js;
558        }
559
560        $this->locks[$lock] = $matches[2];
561
562        $js = preg_replace('/([+-])\s+([+-])/S', "$1{$lock}$2", $js);
563        /* -- */
564
565        return $js;
566    }
567
568    /**
569     * Replace "locks" with the original characters
570     *
571     * @param  string $js The string to unlock
572     * @return bool
573     */
574    protected function unlock($js)
575    {
576        if (empty($this->locks)) {
577            return $js;
578        }
579
580        foreach ($this->locks as $lock => $replacement) {
581            $js = str_replace($lock, $replacement, $js);
582        }
583
584        return $js;
585    }
586
587}
Note: See TracBrowser for help on using the repository browser.

Sites map