Google trends parser

Автор: trendsparser Написано: 1 г. назад Форматирование: php.
Без нумерации строк
  1. <?php
  2. $dir = "pages/";
  3. //Trends Parsing
  4. $trends_url = 'http://www.google.com/trends/hottrends/atom/hourly';
  5. $trends = file_get_contents($trends_url);
  6. $trends = html_entity_decode(htmlentities($trends, ENT_COMPAT, 'UTF-8'));
  7. preg_match_all("/<a href=\"[^>]*>(.*?)<\/a>/is", $trends, $keywords, PREG_PATTERN_ORDER);
  8.  
  9. shuffle($keywords[1]);
  10.  
  11. $kw = array();
  12. $c = rand (5, 6); // Выборка количества трендов для генерации
  13. $k = array_keys($keywords[1]);
  14. $v = array_values($keywords[1]);
  15.  
  16. while($c>0) {
  17. $q=array_rand($k);
  18. $kw[$k[$q]]=$v[$q];
  19. unset($k[$q]);
  20. $c--;
  21. $cur_kw = $kw[$q]; //текущий тренд
  22. echo "<h1>".$cur_kw."</h1>";
  23.  
  24. $count = rand (4, 9);// Количество страниц для парсинга
  25.  
  26. //Google Serps Parsing
  27. $google_request = 'http://www.google.com/ie?hl=en&num='.$count.'&start=0&lr=&q='.urlencode(trim($kw[$q]));
  28. //$google_request = 'http://www.google.com/search?hl=en&q=keyword&btnG=Google+Search&meta=lr%3Dlang_en';
  29. $google_pattern = '/<a title="(.*)" href=(.*)>(.*)<\/a>/isU';
  30. $google_result = file_get_contents($google_request);
  31. $google_result = str_replace("&gt;", "-", $google_result);
  32. $google_result = str_replace("&lt;b&gt;", "", $google_result);
  33. $google_result = str_replace("&lt;/b&gt;", "", $google_result);
  34. $google_result = str_replace("<b>", "", $google_result);
  35. $google_result = str_replace("</b>", "", $google_result);
  36. $google_result = str_replace("’", "'", $google_result);
  37. $google_result = str_replace(" ...", ".", $google_result);
  38. $google_result = str_replace("...", ".", $google_result);
  39. $google_result = str_replace(" | ", ", ", $google_result);
  40. $google_result = str_replace("&#39;", "'", $google_result);
  41. preg_match_all ($google_pattern, $google_result, $google_data);
  42.  
  43. //MSN Serps Parsing
  44. $msn_url = 'http://search.msn.com/results.aspx?format=rss&count='.$count.'&q='.urlencode(trim($kw[$q]));
  45. $msn_request = file_get_contents($msn_url);
  46. $msn_request = html_entity_decode(htmlentities($msn_request, ENT_COMPAT, 'UTF-8'));
  47. $msn_request = str_replace(" ...", ".", $msn_request);
  48. $msn_request = str_replace("Live Search: ", "", $msn_request);
  49. preg_match_all("/<title[^>]*>(.*?)<\/title>/is", $msn_request, $msn_title, PREG_PATTERN_ORDER);
  50. preg_match_all("/<description[^>]*>(.*?)<\/description>/is", $msn_request, $msn_desc, PREG_PATTERN_ORDER);
  51.  
  52. //Blogsearch Serps parsing
  53. $blogs_url = 'http://blogsearch.google.com/blogsearch_feeds?hl=en&ie=utf-8&num='.$count.'&output=atom&q='.urlencode($kw[$q]);
  54. $blogs_request = file_get_contents($blogs_url);
  55. $blogs_request = html_entity_decode(htmlentities($blogs_request, ENT_COMPAT, 'UTF-8'));
  56. $blogs_request=str_replace("&lt;b&gt;", "", $blogs_request);
  57. $blogs_request=str_replace("&lt;/b&gt;", "", $blogs_request);
  58. $blogs_request=str_replace("... ", "", $blogs_request);
  59. $blogs_request=str_replace(" ...", "", $blogs_request);
  60. $blogs_request=str_replace("&#039;", "", $blogs_request);
  61. $blogs_request=str_replace("--", "", $blogs_request);
  62. $blogs_request=str_replace("Google Blog Search: ", "", $blogs_request);
  63. preg_match_all("/<title[^>]*>(.*?)<\/title>/is", $blogs_request, $blogs_title, PREG_PATTERN_ORDER);
  64. preg_match_all("/<content[^>]*>(.*?)<\/content>/is", $blogs_request, $blogs_desc, PREG_PATTERN_ORDER);
  65.  
  66.  
  67. for ($i = 0; $i<count($google_data[1]); $i++){
  68. $googlelinktitle = $google_data[0][$i]; //Активная ссылка с заголовком в анкоре из серпа google
  69. $googlesnippet = $google_data[1][$i]; //Текст google сниппета
  70. $googlelinkurl = $google_data[2][$i]; //Активная ссылка с url сайта в анкоре из серпа google
  71. $googletitle = $google_data[3][$i]; //Заголовок сайта из сниппета google
  72.  
  73. $msnlinktitle = $msn_title[1][$i]; //Заголовок сайта из сниппета msn
  74. $msnsnippet = $msn_desc[1][$i]; //Текст msn сниппета
  75.  
  76. $blogslinktitle = $blogs_title[1][$i]; //Заголовок сайта из сниппета blogsearch
  77. $blogssnippet = $blogs_desc[1][$i]; //Текст blogsearch сниппета
  78.  
  79. $cur_kw = $kw[$q]; //текущий тренд
  80.  
  81. //Content
  82. $pagetext = $googlesnippet."<br>".$blogslinktitle." ".$msn_snippet."<br>".$blogssnippet."<br><hr>";
  83. $pagelink = "<a target='_blank' href='".$googlelinkurl."'>".$googletitle."</a> <br>";
  84. echo $pagetext.$pagelink;
  85. }
  86.  
  87.  
  88. }
  89.  
  90. ?>
Теги: google, trends