Changeset 3730:5c45a5df9a59 for inc/core/class.dc.trackback.php
- Timestamp:
- 03/08/18 17:58:39 (8 years ago)
- Branch:
- default
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
inc/core/class.dc.trackback.php
r3521 r3730 10 10 # 11 11 # -- END LICENSE BLOCK ----------------------------------------- 12 if (!defined('DC_RC_PATH')) { return;}12 if (!defined('DC_RC_PATH')) {return;} 13 13 14 14 /** … … 17 17 18 18 Sends and receives trackbacks/pingbacks. Also handles trackbacks/pingbacks auto discovery. 19 */19 */ 20 20 class dcTrackback 21 21 { 22 public $core; ///< <b>dcCore</b> dcCore instance 23 public $table; ///< <b>string</b> done pings table name 24 25 /** 26 Object constructor 27 28 @param core <b>dcCore</b> dcCore instance 29 */ 30 public function __construct($core) 31 { 32 $this->core =& $core; 33 $this->con =& $this->core->con; 34 $this->table = $this->core->prefix.'ping'; 35 } 36 37 /// @name Send 38 //@{ 39 /** 40 Get all pings sent for a given post. 41 42 @param post_id <b>integer</b> Post ID 43 @return <b>record</b> 44 */ 45 public function getPostPings($post_id) 46 { 47 $strReq = 'SELECT ping_url, ping_dt '. 48 'FROM '.$this->table.' '. 49 'WHERE post_id = '.(integer) $post_id; 50 51 return $this->con->select($strReq); 52 } 53 54 /** 55 Sends a ping to given <var>$url</var>. 56 57 @param url <b>string</b> URL to ping 58 @param post_id <b>integer</b> Post ID 59 @param post_title <b>string</b> Post title 60 @param post_excerpt <b>string</b> Post excerpt 61 @param post_url <b>string</b> Post URL 62 */ 63 public function ping($url,$post_id,$post_title,$post_excerpt,$post_url) 64 { 65 if ($this->core->blog === null) { 66 return false; 67 } 68 69 $post_id = (integer) $post_id; 70 71 # Check for previously done trackback 72 $strReq = 'SELECT post_id, ping_url FROM '.$this->table.' '. 73 'WHERE post_id = '.$post_id.' '. 74 "AND ping_url = '".$this->con->escape($url)."' "; 75 76 $rs = $this->con->select($strReq); 77 78 if (!$rs->isEmpty()) { 79 throw new Exception(sprintf(__('%s has still been pinged'),$url)); 80 } 81 82 $ping_parts = explode('|',$url); 83 # Maybe a webmention 84 if (count($ping_parts) == 3) { 85 $payload = http_build_query(array( 86 'source' => $post_url, 87 'target' => $ping_parts[1] 88 )); 89 90 try { 91 $http = self::initHttp($ping_parts[0],$path); 92 $http->setMoreHeader('Content-Type: application/x-www-form-urlencoded'); 93 $http->post($path,$payload,'UTF-8'); 94 95 # Read response status 96 $status = $http->getStatus(); 97 $ping_error = '0'; 98 } 99 catch (Exception $e) { 100 throw new Exception(__('Unable to ping URL')); 101 } 102 103 if (!in_array($status,array('200','201','202'))) { 104 $ping_error = $http->getStatus(); 105 $ping_msg = __('Bad server response code'); 106 } 107 } 108 # No, let's walk by the trackback way 109 elseif (count($ping_parts) < 2) { 110 $data = array( 111 'title' => $post_title, 112 'excerpt' => $post_excerpt, 113 'url' => $post_url, 114 'blog_name' => trim(html::escapeHTML(html::clean($this->core->blog->name))) 115 //,'__debug' => false 116 ); 117 118 # Ping 119 try { 120 $http = self::initHttp($url,$path); 121 $http->post($path,$data,'UTF-8'); 122 $res = $http->getContent(); 123 } 124 catch (Exception $e) { 125 throw new Exception(__('Unable to ping URL')); 126 } 127 128 $pattern = 129 '|<response>.*<error>(.*)</error>(.*)'. 130 '(<message>(.*)</message>(.*))?'. 131 '</response>|msU'; 132 133 if (!preg_match($pattern,$res,$match)) { 134 throw new Exception(sprintf(__('%s is not a ping URL'),$url)); 135 } 136 137 $ping_error = trim($match[1]); 138 $ping_msg = (!empty($match[4])) ? $match[4] : ''; 139 } 140 # Damnit ! Let's play pingback 141 else { 142 try { 143 $xmlrpc = new xmlrpcClient($ping_parts[0]); 144 $res = $xmlrpc->query('pingback.ping', $post_url, $ping_parts[1]); 145 $ping_error = '0'; 146 } 147 catch (xmlrpcException $e) { 148 $ping_error = $e->getCode(); 149 $ping_msg = $e->getMessage(); 150 } 151 catch (Exception $e) { 152 throw new Exception(__('Unable to ping URL')); 153 } 154 } 155 156 if ($ping_error != '0') { 157 throw new Exception(sprintf(__('%s, ping error:'),$url).' '.$ping_msg); 158 } 159 else { 160 # Notify ping result in database 161 $cur = $this->con->openCursor($this->table); 162 $cur->post_id = $post_id; 163 $cur->ping_url = $url; 164 $cur->ping_dt = date('Y-m-d H:i:s'); 165 166 $cur->insert(); 167 } 168 } 169 //@} 170 171 /// @name Receive 172 //@{ 173 /** 174 Receives a trackback and insert it as a comment of given post. 175 176 @param post_id <b>integer</b> Post ID 177 */ 178 public function receiveTrackback($post_id) 179 { 180 header('Content-Type: text/xml; charset=UTF-8'); 181 if (empty($_POST)) { 182 http::head(405,'Method Not Allowed'); 183 echo 184 '<?xml version="1.0" encoding="utf-8"?>'."\n". 185 "<response>\n". 186 " <error>1</error>\n". 187 " <message>POST request needed</message>\n". 188 "</response>"; 189 return; 190 } 191 192 $post_id = (integer) $post_id; 193 194 $title = !empty($_POST['title']) ? $_POST['title'] : ''; 195 $excerpt = !empty($_POST['excerpt']) ? $_POST['excerpt'] : ''; 196 $url = !empty($_POST['url']) ? $_POST['url'] : ''; 197 $blog_name = !empty($_POST['blog_name']) ? $_POST['blog_name'] : ''; 198 $charset = ''; 199 $comment = ''; 200 201 $err = false; 202 $msg = ''; 203 204 if ($this->core->blog === null) { 205 $err = true; 206 $msg = 'No blog.'; 207 } 208 elseif ($url == '') { 209 $err = true; 210 $msg = 'URL parameter is required.'; 211 } 212 elseif ($blog_name == '') { 213 $err = true; 214 $msg = 'Blog name is required.'; 215 } 216 217 if (!$err) { 218 $post = $this->core->blog->getPosts(array('post_id'=>$post_id,'post_type'=>'')); 219 220 if ($post->isEmpty()) { 221 $err = true; 222 $msg = 'No such post.'; 223 } 224 elseif (!$post->trackbacksActive()) { 225 $err = true; 226 $msg = 'Trackbacks are not allowed for this post or weblog.'; 227 } 228 229 $url = trim(html::clean($url)); 230 if ($this->pingAlreadyDone($post->post_id,$url)) { 231 $err = true; 232 $msg = 'The trackback has already been registered'; 233 } 234 } 235 236 if (!$err) { 237 $charset = self::getCharsetFromRequest(); 238 239 if (!$charset) { 240 $charset = self::detectCharset($title.' '.$excerpt.' '.$blog_name); 241 } 242 243 if (strtolower($charset) != 'utf-8') { 244 $title = iconv($charset,'UTF-8',$title); 245 $excerpt = iconv($charset,'UTF-8',$excerpt); 246 $blog_name = iconv($charset,'UTF-8',$blog_name); 247 } 248 249 $title = trim(html::clean($title)); 250 $title = html::decodeEntities($title); 251 $title = html::escapeHTML($title); 252 $title = text::cutString($title,60); 253 254 $excerpt = trim(html::clean($excerpt)); 255 $excerpt = html::decodeEntities($excerpt); 256 $excerpt = preg_replace('/\s+/ms',' ',$excerpt); 257 $excerpt = text::cutString($excerpt,252); 258 $excerpt = html::escapeHTML($excerpt).'...'; 259 260 $blog_name = trim(html::clean($blog_name)); 261 $blog_name = html::decodeEntities($blog_name); 262 $blog_name = html::escapeHTML($blog_name); 263 $blog_name = text::cutString($blog_name,60); 264 265 try { 266 $this->addBacklink($post_id,$url,$blog_name,$title,$excerpt,$comment); 267 } 268 catch (Exception $e) { 269 $err = 1; 270 $msg = 'Something went wrong : '.$e->getMessage(); 271 } 272 } 273 274 $resp = 275 '<?xml version="1.0" encoding="utf-8"?>'."\n". 276 "<response>\n". 277 ' <error>'.(integer) $err."</error>\n"; 278 279 if ($msg) { 280 $resp .= ' <message>'.$msg."</message>\n"; 281 } 282 283 if (!empty($_POST['__debug'])) { 284 $resp .= 285 " <debug>\n". 286 ' <title>'.$title."</title>\n". 287 ' <excerpt>'.$excerpt."</excerpt>\n". 288 ' <url>'.$url."</url>\n". 289 ' <blog_name>'.$blog_name."</blog_name>\n". 290 ' <charset>'.$charset."</charset>\n". 291 ' <comment>'.$comment."</comment>\n". 292 " </debug>\n"; 293 } 294 295 echo $resp."</response>"; 296 } 297 298 /** 299 Receives a pingback and insert it as a comment of given post. 300 301 @param from_url <b>string</b> Source URL 302 @param to_url <b>string</b> Target URL 303 */ 304 public function receivePingback($from_url,$to_url) 305 { 306 try { 307 $posts = $this->getTargetPost($to_url); 308 309 if ($this->pingAlreadyDone($posts->post_id,$from_url)) { 310 throw new Exception(__('Don\'t repeat yourself, please.'), 48); 311 } 312 313 $remote_content = $this->getRemoteContent($from_url); 314 315 # We want a title... 316 if (!preg_match('!<title>([^<].*?)</title>!mis',$remote_content,$m)) { 317 throw new Exception(__('Where\'s your title?'), 0); 318 } 319 $title = trim(html::clean($m[1])); 320 $title = html::decodeEntities($title); 321 $title = html::escapeHTML($title); 322 $title = text::cutString($title,60); 323 324 preg_match('!<body[^>]*?>(.*)?</body>!msi',$remote_content,$m); 325 $source = $m[1]; 326 $source = preg_replace('![\r\n\s]+!ms',' ',$source); 327 $source = preg_replace( "/<\/*(h\d|p|th|td|li|dt|dd|pre|caption|input|textarea|button)[^>]*>/","\n\n",$source ); 328 $source = strip_tags($source,'<a>'); 329 $source = explode("\n\n",$source); 330 331 $excerpt = ''; 332 foreach ($source as $line) { 333 if (strpos($line, $to_url) !== false) { 334 if (preg_match("!<a[^>]+?".$to_url."[^>]*>([^>]+?)</a>!",$line,$m)) { 335 $excerpt = strip_tags($line); 336 break; 337 } 338 } 339 } 340 if ($excerpt) { 341 $excerpt = '(…) '.text::cutString(html::escapeHTML($excerpt),200).' (…)'; 342 } 343 else { 344 $excerpt = '(…)'; 345 } 346 347 $this->addBacklink($posts->post_id,$from_url,'',$title,$excerpt,$comment); 348 } 349 catch (Exception $e) { 350 throw new Exception(__('Sorry, an internal problem has occured.'),0); 351 } 352 353 return __('Thanks, mate. It was a pleasure.'); 354 } 355 356 /** 357 Receives a webmention and insert it as a comment of given post. 358 359 NB: plugin Fair Trackback check source content to find url. 360 361 @return <b>null</b> Null on success, else throw an exception 362 */ 363 public function receiveWebmention() 364 { 365 $err = $post_id = false; 366 header('Content-Type: text/html; charset=UTF-8'); 367 368 try { 369 # Check if post and target are valid URL 370 if (empty($_POST['source']) || empty($_POST['target'])) { 371 throw new Exception('Source or target is not valid',0); 372 } 373 374 $from_url = urldecode($_POST['source']); 375 $to_url = urldecode($_POST['target']); 376 377 self::checkURLs($from_url,$to_url); 378 379 # Try to find post 380 $posts = $this->getTargetPost($to_url); 381 $post_id = $posts->post_id; 382 383 # Check if it's an updated mention 384 if ($this->pingAlreadyDone($post_id,$from_url)) { 385 $this->delBacklink($post_id,$from_url); 386 } 387 388 # Create a comment for received webmention 389 $remote_content = $this->getRemoteContent($from_url); 390 391 # We want a title... 392 if (!preg_match('!<title>([^<].*?)</title>!mis',$remote_content,$m)) { 393 throw new Exception(__('Where\'s your title?'), 0); 394 } 395 $title = trim(html::clean($m[1])); 396 $title = html::decodeEntities($title); 397 $title = html::escapeHTML($title); 398 $title = text::cutString($title,60); 399 400 preg_match('!<body[^>]*?>(.*)?</body>!msi',$remote_content,$m); 401 $source = $m[1]; 402 $source = preg_replace('![\r\n\s]+!ms',' ',$source); 403 $source = preg_replace( "/<\/*(h\d|p|th|td|li|dt|dd|pre|caption|input|textarea|button)[^>]*>/","\n\n",$source ); 404 $source = strip_tags($source,'<a>'); 405 $source = explode("\n\n",$source); 406 407 $excerpt = ''; 408 foreach ($source as $line) { 409 if (strpos($line, $to_url) !== false) { 410 if (preg_match("!<a[^>]+?".$to_url."[^>]*>([^>]+?)</a>!",$line,$m)) { 411 $excerpt = strip_tags($line); 412 break; 413 } 414 } 415 } 416 if ($excerpt) { 417 $excerpt = '(…) '.text::cutString(html::escapeHTML($excerpt),200).' (…)'; 418 } 419 else { 420 $excerpt = '(…)'; 421 } 422 423 $this->addBacklink($post_id,$from_url,'',$title,$excerpt,$comment); 424 425 # All done, thanks 426 $code = $this->core->blog->settings->system->trackbacks_pub ? 200 : 202; 427 http::head($code); 428 return; 429 } 430 catch (Exception $e) { 431 $err = $e->getMessage(); 432 } 433 434 http::head(400); 435 echo $err ?: 'Something went wrong.'; 436 return; 437 } 438 439 /** 440 Check if a post previously received a ping a from an URL. 441 442 @param post_id <b>integer</b> Post ID 443 @param from_url <b>string</b> Source URL 444 @return <b>boolean</b> 445 */ 446 private function pingAlreadyDone($post_id,$from_url) 447 { 448 $params = array( 449 'post_id' => $post_id, 450 'comment_site' => $from_url, 451 'comment_trackback' => 1, 452 ); 453 454 $rs = $this->core->blog->getComments($params,true); 455 if ($rs && !$rs->isEmpty()) { 456 return ($rs->f(0)); 457 } 458 459 return false; 460 } 461 462 /** 463 Create a comment marked as trackback for a given post. 464 465 @param post_id <b>integer</b> Post ID 466 @param url <b>string</b> Discovered URL 467 @param blog name <b>string</b> Source blog name 468 @param title <b>string</b> Comment title 469 @param excerpt <b>string</b> Source excerpt 470 @param comment <b>string</b> Comment content 471 */ 472 private function addBacklink($post_id,$url,$blog_name,$title,$excerpt,&$comment) 473 { 474 if (empty($blog_name)) { 475 // Let use title as text link for this backlink 476 $blog_name = ($title ?: 'Anonymous blog'); 477 } 478 479 $comment = 480 "<!-- TB -->\n". 481 '<p><strong>'.($title ?: $blog_name)."</strong></p>\n". 482 '<p>'.$excerpt.'</p>'; 483 484 $cur = $this->core->con->openCursor($this->core->prefix.'comment'); 485 $cur->comment_author = (string) $blog_name; 486 $cur->comment_site = (string) $url; 487 $cur->comment_content = (string) $comment; 488 $cur->post_id = $post_id; 489 $cur->comment_trackback = 1; 490 $cur->comment_status = $this->core->blog->settings->system->trackbacks_pub ? 1 : -1; 491 $cur->comment_ip = http::realIP(); 492 493 # --BEHAVIOR-- publicBeforeTrackbackCreate 494 $this->core->callBehavior('publicBeforeTrackbackCreate',$cur); 495 if ($cur->post_id) { 496 $comment_id = $this->core->blog->addComment($cur); 497 498 # --BEHAVIOR-- publicAfterTrackbackCreate 499 $this->core->callBehavior('publicAfterTrackbackCreate',$cur,$comment_id); 500 } 501 } 502 503 /** 504 Delete previously received comment made from an URL for a given post. 505 506 @param post_id <b>integer</b> Post ID 507 @param url <b>string</b> Source URL 508 */ 509 private function delBacklink($post_id,$url) 510 { 511 $this->con->execute( 512 'DELETE FROM '.$this->core->prefix.'comment '. 513 'WHERE post_id = '.((integer) $post_id).' '. 514 "AND comment_site = '".$this->core->con->escape((string) $url)."' ". 515 'AND comment_trackback = 1 ' 516 ); 517 } 518 519 /** 520 Find Charset from HTTP headers. 521 522 @param header <b>string</b> Source header 523 @return <b>string</b> 524 */ 525 private static function getCharsetFromRequest($header = '') 526 { 527 if (!$header && isset($_SERVER['CONTENT_TYPE'])) { 528 $header = $_SERVER['CONTENT_TYPE']; 529 } 530 531 if ($header) { 532 if (preg_match('|charset=([a-zA-Z0-9-]+)|',$header,$m)) { 533 return $m[1]; 534 } 535 } 536 537 return null; 538 } 539 540 /** 541 Detect encoding. 542 543 @param content <b>string</b> Source URL 544 @return <b>string</b> 545 */ 546 private static function detectCharset($content) 547 { 548 return mb_detect_encoding($content, 549 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,'. 550 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,'. 551 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'); 552 } 553 554 /** 555 Retreive local post from a given URL 556 557 @param to_url <b>string</b> Target URL 558 @return <b>string</b> 559 */ 560 private function getTargetPost($to_url) 561 { 562 $reg = '!^'.preg_quote($this->core->blog->url).'(.*)!'; 563 $type = $args = $next = ''; 564 565 # Are you dumb? 566 if (!preg_match($reg,$to_url,$m)) { 567 throw new Exception(__('Any chance you ping one of my contents? No? Really?'),0); 568 } 569 570 # Does the targeted URL look like a registered post type? 571 $url_part = $m[1]; 572 $p_type = ''; 573 $post_types = $this->core->getPostTypes(); 574 foreach ($post_types as $k => $v) { 575 $reg = '!^'.preg_quote(str_replace('%s','',$v['public_url'])).'(.*)!'; 576 if (preg_match($reg,$url_part,$n)) { 577 $p_type = $k; 578 $post_url = $n[1]; 579 break; 580 } 581 } 582 583 if (empty($p_type)) { 584 throw new Exception(__('Sorry but you can not ping this type of content.'),33); 585 } 586 587 # Time to see if we've got a winner... 588 $params = array( 589 'post_type' => $p_type, 590 'post_url' => $post_url, 591 ); 592 $posts = $this->core->blog->getPosts($params); 593 594 # Missed! 595 if ($posts->isEmpty()) { 596 throw new Exception(__('Oops. Kinda "not found" stuff. Please check the target URL twice.'),33); 597 } 598 599 # Nice try. But, sorry, no. 600 if (!$posts->trackbacksActive()) { 601 throw new Exception(__('Sorry, dude. This entry does not accept pingback at the moment.'),33); 602 } 603 604 return $posts; 605 } 606 607 /** 608 Returns content of a distant page 609 610 @param from_url <b>string</b> Target URL 611 @return <b>string</b> 612 */ 613 private function getRemoteContent($from_url) 614 { 615 $http = self::initHttp($from_url,$from_path); 616 617 # First round : just to be sure the ping comes from an acceptable resource type. 618 $http->setHeadersOnly(true); 619 $http->get($from_path); 620 $c_type = explode(';',$http->getHeader('content-type')); 621 622 # Bad luck. Bye, bye... 623 if (!in_array($c_type[0],array('text/html','application/xhtml+xml'))) { 624 throw new Exception(__('Your source URL does not look like a supported content type. Sorry. Bye, bye!'),0); 625 } 626 627 # Second round : let's go fetch and parse the remote content 628 $http->setHeadersOnly(false); 629 $http->get($from_path); 630 $remote_content = $http->getContent(); 631 632 # Convert content charset 633 $charset = self::getCharsetFromRequest($http->getHeader('content-type')); 634 if (!$charset) { 635 $charset = self::detectCharset($remote_content); 636 } 637 if (strtolower($charset) != 'utf-8') { 638 $remote_content = iconv($charset,'UTF-8',$remote_content); 639 } 640 641 return $remote_content; 642 } 643 //@} 644 645 /// @name Discover 646 //@{ 647 /** 648 Returns an array containing all discovered trackbacks URLs in 649 <var>$text</var>. 650 651 @param text <b>string</b> Input text 652 @return <b>array</b> 653 */ 654 public function discover($text) 655 { 656 $res = array(); 657 658 foreach ($this->getTextLinks($text) as $link) 659 { 660 if (($url = $this->getPingURL($link)) !== null) { 661 $res[] = $url; 662 } 663 } 664 665 return $res; 666 } 667 668 /** 669 Find links into a text. 670 671 @param text <b>string</b> Text to scan 672 @return <b>array</b> 673 */ 674 private function getTextLinks($text) 675 { 676 $res = array(); 677 678 # href attribute on "a" tags 679 if (preg_match_all('/<a ([^>]+)>/ms',$text,$match,PREG_SET_ORDER)) { 680 for ($i = 0; $i<count($match); $i++) 681 { 682 if (preg_match('/href="((https?:\/)?\/[^"]+)"/ms',$match[$i][1],$matches)) { 683 $res[$matches[1]] = 1; 684 } 685 } 686 } 687 unset($match); 688 689 # cite attributes on "blockquote" and "q" tags 690 if (preg_match_all('/<(blockquote|q) ([^>]+)>/ms',$text,$match,PREG_SET_ORDER)) { 691 for ($i = 0; $i<count($match); $i++) 692 { 693 if (preg_match('/cite="((https?:\/)?\/[^"]+)"/ms',$match[$i][2],$matches)) { 694 $res[$matches[1]] = 1; 695 } 696 } 697 } 698 699 return array_keys($res); 700 } 701 702 /** 703 Check remote header/content to find api trace. 704 705 @param url <b>string</b> URL to scan 706 @return <b>string</b> 707 */ 708 private function getPingURL($url) 709 { 710 if (strpos($url,'/') === 0) { 711 $url = http::getHost().$url; 712 } 713 714 try { 715 $http = self::initHttp($url,$path); 716 $http->get($path); 717 $page_content = $http->getContent(); 718 $pb_url = $http->getHeader('x-pingback'); 719 $wm_url = $http->getHeader('link'); 720 } 721 catch (Exception $e) { 722 return false; 723 } 724 725 # Let's check for an elderly trackback data chunk... 726 $pattern_rdf = 727 '/<rdf:RDF.*?>.*?'. 728 '<rdf:Description\s+(.*?)\/>'. 729 '.*?<\/rdf:RDF>'. 730 '/msi'; 731 732 preg_match_all($pattern_rdf,$page_content,$rdf_all,PREG_SET_ORDER); 733 734 $url_path = parse_url($url, PHP_URL_PATH); 735 $sanitized_url = str_replace($url_path, html::sanitizeURL($url_path),$url); 736 737 for ($i=0; $i<count($rdf_all); $i++) 738 { 739 $rdf = $rdf_all[$i][1]; 740 if (preg_match('/dc:identifier="'.preg_quote($url,'/').'"/msi',$rdf) || 741 preg_match('/dc:identifier="'.preg_quote($sanitized_url,'/').'"/msi',$rdf)) { 742 if (preg_match('/trackback:ping="(.*?)"/msi',$rdf,$tb_link)) { 743 return $tb_link[1]; 744 } 745 } 746 } 747 748 # No trackback ? OK, let see if we've got a X-Pingback header and it's a valid URL, it will be enough 749 if ($pb_url && filter_var($pb_url,FILTER_VALIDATE_URL) && preg_match('!^https?:!',$pb_url)) { 750 return $pb_url.'|'.$url; 751 } 752 753 # No X-Pingback header. A link rel=pingback, maybe ? 754 $pattern_pingback = '!<link rel="pingback" href="(.*?)"( /)?>!msi'; 755 756 if (preg_match($pattern_pingback,$page_content,$m)) { 757 $pb_url = $m[1]; 758 if (filter_var($pb_url,FILTER_VALIDATE_URL) && preg_match('!^https?:!',$pb_url)) { 759 return $pb_url.'|'.$url; 760 } 761 } 762 763 # Nothing, let's try webmention. Only support x/html content 764 if ($wm_url) { 765 $type = explode(';',$http->getHeader('content-type')); 766 if (!in_array($type[0],array('text/html','application/xhtml+xml'))) { 767 $wm_url = false; 768 } 769 } 770 771 # Check HTTP headers for a Link: <ENDPOINT_URL>; rel="webmention" 772 $wm_api = false; 773 if ($wm_url) { 774 if(preg_match('~<((?:https?://)?[^>]+)>; rel="?(?:https?://webmention.org/?|webmention)"?~',$wm_url,$match)) { 775 if (filter_var($match[1],FILTER_VALIDATE_URL) && preg_match('!^https?:!',$match[1])) { 776 $wm_api = $match[1]; 777 } 778 } 779 } 780 781 # Else check content for <link href="ENDPOINT_URL" rel="webmention" /> 782 if ($wm_url && !$wm_api) { 783 $content = preg_replace('/<!--(.*)-->/Us','',$page_content); 784 if (preg_match('/<(?:link|a)[ ]+href="([^"]*)"[ ]+rel="[^" ]* ?webmention ?[^" ]*"[ ]*\/?>/i',$content,$match) 785 || preg_match('/<(?:link|a)[ ]+rel="[^" ]* ?webmention ?[^" ]*"[ ]+href="([^"]*)"[ ]*\/?>/i',$content,$match)) { 786 $wm_api = $match[1]; 787 } 788 } 789 790 # We have a winner, let's add some tricks to make diference 791 if ($wm_api) { 792 return $wm_api.'|'.$url.'|webmention'; 793 } 794 795 return null; 796 } 797 //@} 798 799 /** 800 HTTP helper. 801 802 @param url <b>string</b> URL 803 @param path <b>string</b> Path 804 @return <b>object</b> 805 */ 806 private static function initHttp($url,&$path) 807 { 808 $client = netHttp::initClient($url,$path); 809 $client->setTimeout(5); 810 $client->setUserAgent('Dotclear - http://www.dotclear.org/'); 811 $client->useGzip(false); 812 $client->setPersistReferers(false); 813 814 return $client; 815 } 816 817 /** 818 URL helper. 819 820 @param from_url <b>string</b> URL a 821 @param to_url <b>string</b> URL b 822 */ 823 public static function checkURLs($from_url,$to_url) 824 { 825 if (!(filter_var($from_url,FILTER_VALIDATE_URL) && preg_match('!^https?://!',$from_url))) { 826 throw new Exception(__('No valid source URL provided? Try again!'), 0); 827 } 828 829 if (!(filter_var($to_url,FILTER_VALIDATE_URL) && preg_match('!^https?://!',$to_url))) { 830 throw new Exception(__('No valid target URL provided? Try again!'), 0); 831 } 832 833 if (html::sanitizeURL(urldecode($from_url)) == html::sanitizeURL(urldecode($to_url))) { 834 throw new Exception(__('LOL!'), 0); 835 } 836 } 22 public $core; ///< <b>dcCore</b> dcCore instance 23 public $table; ///< <b>string</b> done pings table name 24 25 /** 26 Object constructor 27 28 @param core <b>dcCore</b> dcCore instance 29 */ 30 public function __construct($core) 31 { 32 $this->core = &$core; 33 $this->con = &$this->core->con; 34 $this->table = $this->core->prefix . 'ping'; 35 } 36 37 /// @name Send 38 //@{ 39 /** 40 Get all pings sent for a given post. 41 42 @param post_id <b>integer</b> Post ID 43 @return <b>record</b> 44 */ 45 public function getPostPings($post_id) 46 { 47 $strReq = 'SELECT ping_url, ping_dt ' . 48 'FROM ' . $this->table . ' ' . 49 'WHERE post_id = ' . (integer) $post_id; 50 51 return $this->con->select($strReq); 52 } 53 54 /** 55 Sends a ping to given <var>$url</var>. 56 57 @param url <b>string</b> URL to ping 58 @param post_id <b>integer</b> Post ID 59 @param post_title <b>string</b> Post title 60 @param post_excerpt <b>string</b> Post excerpt 61 @param post_url <b>string</b> Post URL 62 */ 63 public function ping($url, $post_id, $post_title, $post_excerpt, $post_url) 64 { 65 if ($this->core->blog === null) { 66 return false; 67 } 68 69 $post_id = (integer) $post_id; 70 71 # Check for previously done trackback 72 $strReq = 'SELECT post_id, ping_url FROM ' . $this->table . ' ' . 73 'WHERE post_id = ' . $post_id . ' ' . 74 "AND ping_url = '" . $this->con->escape($url) . "' "; 75 76 $rs = $this->con->select($strReq); 77 78 if (!$rs->isEmpty()) { 79 throw new Exception(sprintf(__('%s has still been pinged'), $url)); 80 } 81 82 $ping_parts = explode('|', $url); 83 # Maybe a webmention 84 if (count($ping_parts) == 3) { 85 $payload = http_build_query(array( 86 'source' => $post_url, 87 'target' => $ping_parts[1] 88 )); 89 90 try { 91 $http = self::initHttp($ping_parts[0], $path); 92 $http->setMoreHeader('Content-Type: application/x-www-form-urlencoded'); 93 $http->post($path, $payload, 'UTF-8'); 94 95 # Read response status 96 $status = $http->getStatus(); 97 $ping_error = '0'; 98 } catch (Exception $e) { 99 throw new Exception(__('Unable to ping URL')); 100 } 101 102 if (!in_array($status, array('200', '201', '202'))) { 103 $ping_error = $http->getStatus(); 104 $ping_msg = __('Bad server response code'); 105 } 106 } 107 # No, let's walk by the trackback way 108 elseif (count($ping_parts) < 2) { 109 $data = array( 110 'title' => $post_title, 111 'excerpt' => $post_excerpt, 112 'url' => $post_url, 113 'blog_name' => trim(html::escapeHTML(html::clean($this->core->blog->name))) 114 //,'__debug' => false 115 ); 116 117 # Ping 118 try { 119 $http = self::initHttp($url, $path); 120 $http->post($path, $data, 'UTF-8'); 121 $res = $http->getContent(); 122 } catch (Exception $e) { 123 throw new Exception(__('Unable to ping URL')); 124 } 125 126 $pattern = 127 '|<response>.*<error>(.*)</error>(.*)' . 128 '(<message>(.*)</message>(.*))?' . 129 '</response>|msU'; 130 131 if (!preg_match($pattern, $res, $match)) { 132 throw new Exception(sprintf(__('%s is not a ping URL'), $url)); 133 } 134 135 $ping_error = trim($match[1]); 136 $ping_msg = (!empty($match[4])) ? $match[4] : ''; 137 } 138 # Damnit ! Let's play pingback 139 else { 140 try { 141 $xmlrpc = new xmlrpcClient($ping_parts[0]); 142 $res = $xmlrpc->query('pingback.ping', $post_url, $ping_parts[1]); 143 $ping_error = '0'; 144 } catch (xmlrpcException $e) { 145 $ping_error = $e->getCode(); 146 $ping_msg = $e->getMessage(); 147 } catch (Exception $e) { 148 throw new Exception(__('Unable to ping URL')); 149 } 150 } 151 152 if ($ping_error != '0') { 153 throw new Exception(sprintf(__('%s, ping error:'), $url) . ' ' . $ping_msg); 154 } else { 155 # Notify ping result in database 156 $cur = $this->con->openCursor($this->table); 157 $cur->post_id = $post_id; 158 $cur->ping_url = $url; 159 $cur->ping_dt = date('Y-m-d H:i:s'); 160 161 $cur->insert(); 162 } 163 } 164 //@} 165 166 /// @name Receive 167 //@{ 168 /** 169 Receives a trackback and insert it as a comment of given post. 170 171 @param post_id <b>integer</b> Post ID 172 */ 173 public function receiveTrackback($post_id) 174 { 175 header('Content-Type: text/xml; charset=UTF-8'); 176 if (empty($_POST)) { 177 http::head(405, 'Method Not Allowed'); 178 echo 179 '<?xml version="1.0" encoding="utf-8"?>' . "\n" . 180 "<response>\n" . 181 " <error>1</error>\n" . 182 " <message>POST request needed</message>\n" . 183 "</response>"; 184 return; 185 } 186 187 $post_id = (integer) $post_id; 188 189 $title = !empty($_POST['title']) ? $_POST['title'] : ''; 190 $excerpt = !empty($_POST['excerpt']) ? $_POST['excerpt'] : ''; 191 $url = !empty($_POST['url']) ? $_POST['url'] : ''; 192 $blog_name = !empty($_POST['blog_name']) ? $_POST['blog_name'] : ''; 193 $charset = ''; 194 $comment = ''; 195 196 $err = false; 197 $msg = ''; 198 199 if ($this->core->blog === null) { 200 $err = true; 201 $msg = 'No blog.'; 202 } elseif ($url == '') { 203 $err = true; 204 $msg = 'URL parameter is required.'; 205 } elseif ($blog_name == '') { 206 $err = true; 207 $msg = 'Blog name is required.'; 208 } 209 210 if (!$err) { 211 $post = $this->core->blog->getPosts(array('post_id' => $post_id, 'post_type' => '')); 212 213 if ($post->isEmpty()) { 214 $err = true; 215 $msg = 'No such post.'; 216 } elseif (!$post->trackbacksActive()) { 217 $err = true; 218 $msg = 'Trackbacks are not allowed for this post or weblog.'; 219 } 220 221 $url = trim(html::clean($url)); 222 if ($this->pingAlreadyDone($post->post_id, $url)) { 223 $err = true; 224 $msg = 'The trackback has already been registered'; 225 } 226 } 227 228 if (!$err) { 229 $charset = self::getCharsetFromRequest(); 230 231 if (!$charset) { 232 $charset = self::detectCharset($title . ' ' . $excerpt . ' ' . $blog_name); 233 } 234 235 if (strtolower($charset) != 'utf-8') { 236 $title = iconv($charset, 'UTF-8', $title); 237 $excerpt = iconv($charset, 'UTF-8', $excerpt); 238 $blog_name = iconv($charset, 'UTF-8', $blog_name); 239 } 240 241 $title = trim(html::clean($title)); 242 $title = html::decodeEntities($title); 243 $title = html::escapeHTML($title); 244 $title = text::cutString($title, 60); 245 246 $excerpt = trim(html::clean($excerpt)); 247 $excerpt = html::decodeEntities($excerpt); 248 $excerpt = preg_replace('/\s+/ms', ' ', $excerpt); 249 $excerpt = text::cutString($excerpt, 252); 250 $excerpt = html::escapeHTML($excerpt) . '...'; 251 252 $blog_name = trim(html::clean($blog_name)); 253 $blog_name = html::decodeEntities($blog_name); 254 $blog_name = html::escapeHTML($blog_name); 255 $blog_name = text::cutString($blog_name, 60); 256 257 try { 258 $this->addBacklink($post_id, $url, $blog_name, $title, $excerpt, $comment); 259 } catch (Exception $e) { 260 $err = 1; 261 $msg = 'Something went wrong : ' . $e->getMessage(); 262 } 263 } 264 265 $resp = 266 '<?xml version="1.0" encoding="utf-8"?>' . "\n" . 267 "<response>\n" . 268 ' <error>' . (integer) $err . "</error>\n"; 269 270 if ($msg) { 271 $resp .= ' <message>' . $msg . "</message>\n"; 272 } 273 274 if (!empty($_POST['__debug'])) { 275 $resp .= 276 " <debug>\n" . 277 ' <title>' . $title . "</title>\n" . 278 ' <excerpt>' . $excerpt . "</excerpt>\n" . 279 ' <url>' . $url . "</url>\n" . 280 ' <blog_name>' . $blog_name . "</blog_name>\n" . 281 ' <charset>' . $charset . "</charset>\n" . 282 ' <comment>' . $comment . "</comment>\n" . 283 " </debug>\n"; 284 } 285 286 echo $resp . "</response>"; 287 } 288 289 /** 290 Receives a pingback and insert it as a comment of given post. 291 292 @param from_url <b>string</b> Source URL 293 @param to_url <b>string</b> Target URL 294 */ 295 public function receivePingback($from_url, $to_url) 296 { 297 try { 298 $posts = $this->getTargetPost($to_url); 299 300 if ($this->pingAlreadyDone($posts->post_id, $from_url)) { 301 throw new Exception(__('Don\'t repeat yourself, please.'), 48); 302 } 303 304 $remote_content = $this->getRemoteContent($from_url); 305 306 # We want a title... 307 if (!preg_match('!<title>([^<].*?)</title>!mis', $remote_content, $m)) { 308 throw new Exception(__('Where\'s your title?'), 0); 309 } 310 $title = trim(html::clean($m[1])); 311 $title = html::decodeEntities($title); 312 $title = html::escapeHTML($title); 313 $title = text::cutString($title, 60); 314 315 preg_match('!<body[^>]*?>(.*)?</body>!msi', $remote_content, $m); 316 $source = $m[1]; 317 $source = preg_replace('![\r\n\s]+!ms', ' ', $source); 318 $source = preg_replace("/<\/*(h\d|p|th|td|li|dt|dd|pre|caption|input|textarea|button)[^>]*>/", "\n\n", $source); 319 $source = strip_tags($source, '<a>'); 320 $source = explode("\n\n", $source); 321 322 $excerpt = ''; 323 foreach ($source as $line) { 324 if (strpos($line, $to_url) !== false) { 325 if (preg_match("!<a[^>]+?" . $to_url . "[^>]*>([^>]+?)</a>!", $line, $m)) { 326 $excerpt = strip_tags($line); 327 break; 328 } 329 } 330 } 331 if ($excerpt) { 332 $excerpt = '(…) ' . text::cutString(html::escapeHTML($excerpt), 200) . ' (…)'; 333 } else { 334 $excerpt = '(…)'; 335 } 336 337 $this->addBacklink($posts->post_id, $from_url, '', $title, $excerpt, $comment); 338 } catch (Exception $e) { 339 throw new Exception(__('Sorry, an internal problem has occured.'), 0); 340 } 341 342 return __('Thanks, mate. It was a pleasure.'); 343 } 344 345 /** 346 Receives a webmention and insert it as a comment of given post. 347 348 NB: plugin Fair Trackback check source content to find url. 349 350 @return <b>null</b> Null on success, else throw an exception 351 */ 352 public function receiveWebmention() 353 { 354 $err = $post_id = false; 355 header('Content-Type: text/html; charset=UTF-8'); 356 357 try { 358 # Check if post and target are valid URL 359 if (empty($_POST['source']) || empty($_POST['target'])) { 360 throw new Exception('Source or target is not valid', 0); 361 } 362 363 $from_url = urldecode($_POST['source']); 364 $to_url = urldecode($_POST['target']); 365 366 self::checkURLs($from_url, $to_url); 367 368 # Try to find post 369 $posts = $this->getTargetPost($to_url); 370 $post_id = $posts->post_id; 371 372 # Check if it's an updated mention 373 if ($this->pingAlreadyDone($post_id, $from_url)) { 374 $this->delBacklink($post_id, $from_url); 375 } 376 377 # Create a comment for received webmention 378 $remote_content = $this->getRemoteContent($from_url); 379 380 # We want a title... 381 if (!preg_match('!<title>([^<].*?)</title>!mis', $remote_content, $m)) { 382 throw new Exception(__('Where\'s your title?'), 0); 383 } 384 $title = trim(html::clean($m[1])); 385 $title = html::decodeEntities($title); 386 $title = html::escapeHTML($title); 387 $title = text::cutString($title, 60); 388 389 preg_match('!<body[^>]*?>(.*)?</body>!msi', $remote_content, $m); 390 $source = $m[1]; 391 $source = preg_replace('![\r\n\s]+!ms', ' ', $source); 392 $source = preg_replace("/<\/*(h\d|p|th|td|li|dt|dd|pre|caption|input|textarea|button)[^>]*>/", "\n\n", $source); 393 $source = strip_tags($source, '<a>'); 394 $source = explode("\n\n", $source); 395 396 $excerpt = ''; 397 foreach ($source as $line) { 398 if (strpos($line, $to_url) !== false) { 399 if (preg_match("!<a[^>]+?" . $to_url . "[^>]*>([^>]+?)</a>!", $line, $m)) { 400 $excerpt = strip_tags($line); 401 break; 402 } 403 } 404 } 405 if ($excerpt) { 406 $excerpt = '(…) ' . text::cutString(html::escapeHTML($excerpt), 200) . ' (…)'; 407 } else { 408 $excerpt = '(…)'; 409 } 410 411 $this->addBacklink($post_id, $from_url, '', $title, $excerpt, $comment); 412 413 # All done, thanks 414 $code = $this->core->blog->settings->system->trackbacks_pub ? 200 : 202; 415 http::head($code); 416 return; 417 } catch (Exception $e) { 418 $err = $e->getMessage(); 419 } 420 421 http::head(400); 422 echo $err ?: 'Something went wrong.'; 423 return; 424 } 425 426 /** 427 Check if a post previously received a ping a from an URL. 428 429 @param post_id <b>integer</b> Post ID 430 @param from_url <b>string</b> Source URL 431 @return <b>boolean</b> 432 */ 433 private function pingAlreadyDone($post_id, $from_url) 434 { 435 $params = array( 436 'post_id' => $post_id, 437 'comment_site' => $from_url, 438 'comment_trackback' => 1 439 ); 440 441 $rs = $this->core->blog->getComments($params, true); 442 if ($rs && !$rs->isEmpty()) { 443 return ($rs->f(0)); 444 } 445 446 return false; 447 } 448 449 /** 450 Create a comment marked as trackback for a given post. 451 452 @param post_id <b>integer</b> Post ID 453 @param url <b>string</b> Discovered URL 454 @param blog name <b>string</b> Source blog name 455 @param title <b>string</b> Comment title 456 @param excerpt <b>string</b> Source excerpt 457 @param comment <b>string</b> Comment content 458 */ 459 private function addBacklink($post_id, $url, $blog_name, $title, $excerpt, &$comment) 460 { 461 if (empty($blog_name)) { 462 // Let use title as text link for this backlink 463 $blog_name = ($title ?: 'Anonymous blog'); 464 } 465 466 $comment = 467 "<!-- TB -->\n" . 468 '<p><strong>' . ($title ?: $blog_name) . "</strong></p>\n" . 469 '<p>' . $excerpt . '</p>'; 470 471 $cur = $this->core->con->openCursor($this->core->prefix . 'comment'); 472 $cur->comment_author = (string) $blog_name; 473 $cur->comment_site = (string) $url; 474 $cur->comment_content = (string) $comment; 475 $cur->post_id = $post_id; 476 $cur->comment_trackback = 1; 477 $cur->comment_status = $this->core->blog->settings->system->trackbacks_pub ? 1 : -1; 478 $cur->comment_ip = http::realIP(); 479 480 # --BEHAVIOR-- publicBeforeTrackbackCreate 481 $this->core->callBehavior('publicBeforeTrackbackCreate', $cur); 482 if ($cur->post_id) { 483 $comment_id = $this->core->blog->addComment($cur); 484 485 # --BEHAVIOR-- publicAfterTrackbackCreate 486 $this->core->callBehavior('publicAfterTrackbackCreate', $cur, $comment_id); 487 } 488 } 489 490 /** 491 Delete previously received comment made from an URL for a given post. 492 493 @param post_id <b>integer</b> Post ID 494 @param url <b>string</b> Source URL 495 */ 496 private function delBacklink($post_id, $url) 497 { 498 $this->con->execute( 499 'DELETE FROM ' . $this->core->prefix . 'comment ' . 500 'WHERE post_id = ' . ((integer) $post_id) . ' ' . 501 "AND comment_site = '" . $this->core->con->escape((string) $url) . "' " . 502 'AND comment_trackback = 1 ' 503 ); 504 } 505 506 /** 507 Find Charset from HTTP headers. 508 509 @param header <b>string</b> Source header 510 @return <b>string</b> 511 */ 512 private static function getCharsetFromRequest($header = '') 513 { 514 if (!$header && isset($_SERVER['CONTENT_TYPE'])) { 515 $header = $_SERVER['CONTENT_TYPE']; 516 } 517 518 if ($header) { 519 if (preg_match('|charset=([a-zA-Z0-9-]+)|', $header, $m)) { 520 return $m[1]; 521 } 522 } 523 524 return; 525 } 526 527 /** 528 Detect encoding. 529 530 @param content <b>string</b> Source URL 531 @return <b>string</b> 532 */ 533 private static function detectCharset($content) 534 { 535 return mb_detect_encoding($content, 536 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' . 537 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' . 538 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'); 539 } 540 541 /** 542 Retreive local post from a given URL 543 544 @param to_url <b>string</b> Target URL 545 @return <b>string</b> 546 */ 547 private function getTargetPost($to_url) 548 { 549 $reg = '!^' . preg_quote($this->core->blog->url) . '(.*)!'; 550 $type = $args = $next = ''; 551 552 # Are you dumb? 553 if (!preg_match($reg, $to_url, $m)) { 554 throw new Exception(__('Any chance you ping one of my contents? No? Really?'), 0); 555 } 556 557 # Does the targeted URL look like a registered post type? 558 $url_part = $m[1]; 559 $p_type = ''; 560 $post_types = $this->core->getPostTypes(); 561 foreach ($post_types as $k => $v) { 562 $reg = '!^' . preg_quote(str_replace('%s', '', $v['public_url'])) . '(.*)!'; 563 if (preg_match($reg, $url_part, $n)) { 564 $p_type = $k; 565 $post_url = $n[1]; 566 break; 567 } 568 } 569 570 if (empty($p_type)) { 571 throw new Exception(__('Sorry but you can not ping this type of content.'), 33); 572 } 573 574 # Time to see if we've got a winner... 575 $params = array( 576 'post_type' => $p_type, 577 'post_url' => $post_url 578 ); 579 $posts = $this->core->blog->getPosts($params); 580 581 # Missed! 582 if ($posts->isEmpty()) { 583 throw new Exception(__('Oops. Kinda "not found" stuff. Please check the target URL twice.'), 33); 584 } 585 586 # Nice try. But, sorry, no. 587 if (!$posts->trackbacksActive()) { 588 throw new Exception(__('Sorry, dude. This entry does not accept pingback at the moment.'), 33); 589 } 590 591 return $posts; 592 } 593 594 /** 595 Returns content of a distant page 596 597 @param from_url <b>string</b> Target URL 598 @return <b>string</b> 599 */ 600 private function getRemoteContent($from_url) 601 { 602 $http = self::initHttp($from_url, $from_path); 603 604 # First round : just to be sure the ping comes from an acceptable resource type. 605 $http->setHeadersOnly(true); 606 $http->get($from_path); 607 $c_type = explode(';', $http->getHeader('content-type')); 608 609 # Bad luck. Bye, bye... 610 if (!in_array($c_type[0], array('text/html', 'application/xhtml+xml'))) { 611 throw new Exception(__('Your source URL does not look like a supported content type. Sorry. Bye, bye!'), 0); 612 } 613 614 # Second round : let's go fetch and parse the remote content 615 $http->setHeadersOnly(false); 616 $http->get($from_path); 617 $remote_content = $http->getContent(); 618 619 # Convert content charset 620 $charset = self::getCharsetFromRequest($http->getHeader('content-type')); 621 if (!$charset) { 622 $charset = self::detectCharset($remote_content); 623 } 624 if (strtolower($charset) != 'utf-8') { 625 $remote_content = iconv($charset, 'UTF-8', $remote_content); 626 } 627 628 return $remote_content; 629 } 630 //@} 631 632 /// @name Discover 633 //@{ 634 /** 635 Returns an array containing all discovered trackbacks URLs in 636 <var>$text</var>. 637 638 @param text <b>string</b> Input text 639 @return <b>array</b> 640 */ 641 public function discover($text) 642 { 643 $res = array(); 644 645 foreach ($this->getTextLinks($text) as $link) { 646 if (($url = $this->getPingURL($link)) !== null) { 647 $res[] = $url; 648 } 649 } 650 651 return $res; 652 } 653 654 /** 655 Find links into a text. 656 657 @param text <b>string</b> Text to scan 658 @return <b>array</b> 659 */ 660 private function getTextLinks($text) 661 { 662 $res = array(); 663 664 # href attribute on "a" tags 665 if (preg_match_all('/<a ([^>]+)>/ms', $text, $match, PREG_SET_ORDER)) { 666 for ($i = 0; $i < count($match); $i++) { 667 if (preg_match('/href="((https?:\/)?\/[^"]+)"/ms', $match[$i][1], $matches)) { 668 $res[$matches[1]] = 1; 669 } 670 } 671 } 672 unset($match); 673 674 # cite attributes on "blockquote" and "q" tags 675 if (preg_match_all('/<(blockquote|q) ([^>]+)>/ms', $text, $match, PREG_SET_ORDER)) { 676 for ($i = 0; $i < count($match); $i++) { 677 if (preg_match('/cite="((https?:\/)?\/[^"]+)"/ms', $match[$i][2], $matches)) { 678 $res[$matches[1]] = 1; 679 } 680 } 681 } 682 683 return array_keys($res); 684 } 685 686 /** 687 Check remote header/content to find api trace. 688 689 @param url <b>string</b> URL to scan 690 @return <b>string</b> 691 */ 692 private function getPingURL($url) 693 { 694 if (strpos($url, '/') === 0) { 695 $url = http::getHost() . $url; 696 } 697 698 try { 699 $http = self::initHttp($url, $path); 700 $http->get($path); 701 $page_content = $http->getContent(); 702 $pb_url = $http->getHeader('x-pingback'); 703 $wm_url = $http->getHeader('link'); 704 } catch (Exception $e) { 705 return false; 706 } 707 708 # Let's check for an elderly trackback data chunk... 709 $pattern_rdf = 710 '/<rdf:RDF.*?>.*?' . 711 '<rdf:Description\s+(.*?)\/>' . 712 '.*?<\/rdf:RDF>' . 713 '/msi'; 714 715 preg_match_all($pattern_rdf, $page_content, $rdf_all, PREG_SET_ORDER); 716 717 $url_path = parse_url($url, PHP_URL_PATH); 718 $sanitized_url = str_replace($url_path, html::sanitizeURL($url_path), $url); 719 720 for ($i = 0; $i < count($rdf_all); $i++) { 721 $rdf = $rdf_all[$i][1]; 722 if (preg_match('/dc:identifier="' . preg_quote($url, '/') . '"/msi', $rdf) || 723 preg_match('/dc:identifier="' . preg_quote($sanitized_url, '/') . '"/msi', $rdf)) { 724 if (preg_match('/trackback:ping="(.*?)"/msi', $rdf, $tb_link)) { 725 return $tb_link[1]; 726 } 727 } 728 } 729 730 # No trackback ? OK, let see if we've got a X-Pingback header and it's a valid URL, it will be enough 731 if ($pb_url && filter_var($pb_url, FILTER_VALIDATE_URL) && preg_match('!^https?:!', $pb_url)) { 732 return $pb_url . '|' . $url; 733 } 734 735 # No X-Pingback header. A link rel=pingback, maybe ? 736 $pattern_pingback = '!<link rel="pingback" href="(.*?)"( /)?>!msi'; 737 738 if (preg_match($pattern_pingback, $page_content, $m)) { 739 $pb_url = $m[1]; 740 if (filter_var($pb_url, FILTER_VALIDATE_URL) && preg_match('!^https?:!', $pb_url)) { 741 return $pb_url . '|' . $url; 742 } 743 } 744 745 # Nothing, let's try webmention. Only support x/html content 746 if ($wm_url) { 747 $type = explode(';', $http->getHeader('content-type')); 748 if (!in_array($type[0], array('text/html', 'application/xhtml+xml'))) { 749 $wm_url = false; 750 } 751 } 752 753 # Check HTTP headers for a Link: <ENDPOINT_URL>; rel="webmention" 754 $wm_api = false; 755 if ($wm_url) { 756 if (preg_match('~<((?:https?://)?[^>]+)>; rel="?(?:https?://webmention.org/?|webmention)"?~', $wm_url, $match)) { 757 if (filter_var($match[1], FILTER_VALIDATE_URL) && preg_match('!^https?:!', $match[1])) { 758 $wm_api = $match[1]; 759 } 760 } 761 } 762 763 # Else check content for <link href="ENDPOINT_URL" rel="webmention" /> 764 if ($wm_url && !$wm_api) { 765 $content = preg_replace('/<!--(.*)-->/Us', '', $page_content); 766 if (preg_match('/<(?:link|a)[ ]+href="([^"]*)"[ ]+rel="[^" ]* ?webmention ?[^" ]*"[ ]*\/?>/i', $content, $match) 767 || preg_match('/<(?:link|a)[ ]+rel="[^" ]* ?webmention ?[^" ]*"[ ]+href="([^"]*)"[ ]*\/?>/i', $content, $match)) { 768 $wm_api = $match[1]; 769 } 770 } 771 772 # We have a winner, let's add some tricks to make diference 773 if ($wm_api) { 774 return $wm_api . '|' . $url . '|webmention'; 775 } 776 777 return; 778 } 779 //@} 780 781 /** 782 HTTP helper. 783 784 @param url <b>string</b> URL 785 @param path <b>string</b> Path 786 @return <b>object</b> 787 */ 788 private static function initHttp($url, &$path) 789 { 790 $client = netHttp::initClient($url, $path); 791 $client->setTimeout(5); 792 $client->setUserAgent('Dotclear - http://www.dotclear.org/'); 793 $client->useGzip(false); 794 $client->setPersistReferers(false); 795 796 return $client; 797 } 798 799 /** 800 URL helper. 801 802 @param from_url <b>string</b> URL a 803 @param to_url <b>string</b> URL b 804 */ 805 public static function checkURLs($from_url, $to_url) 806 { 807 if (!(filter_var($from_url, FILTER_VALIDATE_URL) && preg_match('!^https?://!', $from_url))) { 808 throw new Exception(__('No valid source URL provided? Try again!'), 0); 809 } 810 811 if (!(filter_var($to_url, FILTER_VALIDATE_URL) && preg_match('!^https?://!', $to_url))) { 812 throw new Exception(__('No valid target URL provided? Try again!'), 0); 813 } 814 815 if (html::sanitizeURL(urldecode($from_url)) == html::sanitizeURL(urldecode($to_url))) { 816 throw new Exception(__('LOL!'), 0); 817 } 818 } 837 819 }
Note: See TracChangeset
for help on using the changeset viewer.