.*?(.*?).*?.*?
(.*?)
~is'; preg_match_all($pattern, $results['content'], $matches); // $matches[1] = title // $matches[2] = URL // $matches[3] = description // Use tab to separate columns, as commas are often used within descriptions. Therefore using commas // for column delimitation would break very easily. if ($matches) { echo "title\tURL\tdescription\n"; foreach ($matches[2] as $index => $resultItem) { echo strip_tags($resultItem) . "\t"; echo strip_tags($matches[1][$index]) . "\t"; echo strip_tags($matches[3][$index]) . "\t"; echo "\n"; } } /** * Generic function for data and header from a URL. * @param $url The URL to fetch * @param $timeout The time to way if things go wrong. * @return Array containing header and page content. */ function get_url($url, $timeout = 5) { $url = str_replace( "&", "&", urldecode(trim($url)) ); $cookie = tempnam ("/tmp", "CURLCOOKIE"); $ch = curl_init(); curl_setopt( $ch, CURLOPT_USERAGENT, USER_AGENT); curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $ch, CURLOPT_ENCODING, "" ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); curl_setopt( $ch, CURLOPT_AUTOREFERER, true ); curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); # required for https urls curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout ); curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout ); curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 ); $content = curl_exec( $ch ); $response = curl_getinfo( $ch ); curl_close ( $ch ); if ($response['http_code'] == 301 || $response['http_code'] == 302) { ini_set("user_agent", USER_AGENT); if ( $headers = get_headers($response['url']) ) { foreach( $headers as $value ) { if ( substr( strtolower($value), 0, 9 ) == "location:" ) return get_url( trim( substr( $value, 9, strlen($value) ) ) ); } } } return array( 'content' => $content, 'header' => $response ); } ?>