21.08.2023 07:16:20 pm
Класс Phpfox_Request написан в файле: "include/library/phpfox/request/request.class.php", где есть метод: send. Этот метод написан для получения данных со сторонних сайтов.
Код метода:
Я немного поправил код метода. Вот, что у меня получилось:
Моему движку не нужно значение переменной
Возможно, будет интересно: Тема: Как обойти проверку на бота при парсинге стороннего сайта?.
Код метода:
/**
* Send a request to another server. Usually using CURL.
*
* @param string $sUrl URL of the server.
* @param array $aPost $_POST data to send.
* @param string $sMethod Method of request (GET or POST).
* @param string $sUserAgent Useragent to send.
* @param string $aCookies ARRAY of any cookies to pass.
* @return mixed FALSE if failed to connect, STRING if anything was returned from the server.
*/
public function send($sUrl, $aPost = array(), $sMethod = 'POST', $sUserAgent = null, $aCookies = null, $bFollow = false)
{
$aHost = parse_url($sUrl);
$sPost = '';
foreach ($aPost as $sKey => $sValue)
{
$sPost .= '&' . $sKey . '=' . $sValue;
}
// Curl
if (extension_loaded('curl') && function_exists('curl_init'))
{
$hCurl = curl_init();
curl_setopt($hCurl, CURLOPT_URL, (($sMethod == 'GET' && !empty($sPost)) ? $sUrl . '?' . ltrim($sPost, '&') : $sUrl));
curl_setopt($hCurl, CURLOPT_HEADER, false);
curl_setopt($hCurl, CURLOPT_FOLLOWLOCATION, $bFollow);
curl_setopt($hCurl, CURLOPT_RETURNTRANSFER, true);
// Testing this out at the moment...
curl_setopt($hCurl, CURLOPT_SSL_VERIFYPEER, false);
// Run if this is a POST request method
if ($sMethod != 'GET')
{
curl_setopt($hCurl, CURLOPT_POST, true);
curl_setopt($hCurl, CURLOPT_POSTFIELDS, $sPost);
}
// Add the browser agent
curl_setopt($hCurl, CURLOPT_USERAGENT, ($sUserAgent === null ? "" . PHPFOX::BROWSER_AGENT . " (" . PhpFox::getVersion() . ")" : $sUserAgent));
// Check if we need to set some cookies
if ($aCookies !== null)
{
$sLine = "\n";
// Loop thru all the cookies we currently have set
foreach ($aCookies as $sKey => $sValue)
{
// Make sure we don't see the session ID or the browser will crash
if ($sKey == 'PHPSESSID')
{
continue;
}
// Add the cookies
$sLine .= '' . $sKey . '=' . $sValue . '; ';
}
// Trim the cookie
$sLine = trim(rtrim($sLine, ';'));
// Set the cookie
curl_setopt($hCurl, CURLOPT_COOKIE, $sLine);
}
// Run the exec
$sData = curl_exec($hCurl);
// Close the curl connection
curl_close($hCurl);
// Return whatever we can from the curl request
return trim($sData);
}
// file_get_contents()
if ($sMethod == 'GET' && ini_get('allow_url_fopen') && function_exists('file_get_contents'))
{
$sData = file_get_contents($sUrl . "?" . ltrim($sPost, '&'));
return trim($sData);
}
// fsockopen
if (!isset($sData))
{
$hConnection = fsockopen($aHost['host'], 80, $errno, $errstr, 30);
if (!$hConnection)
{
return false;
}
else
{
if ($sMethod == 'GET')
{
$sUrl = $sUrl . '?' . ltrim($sPost, '&');
}
$sSend = "{$sMethod} {$sUrl} HTTP/1.1\r\n";
$sSend .= "Host: {$aHost['host']}\r\n";
$sSend .= "User-Agent: " . PHPFOX::BROWSER_AGENT . " (" . PhpFox::getVersion() . ")\r\n";
$sSend .= "Content-Type: application/x-www-form-urlencoded\r\n";
$sSend .= "Content-Length: " . strlen($sPost) . "\r\n";
$sSend .= "Connection: close\r\n\r\n";
$sSend .= $sPost;
fwrite($hConnection, $sSend);
$sData = '';
while (!feof($hConnection))
{
$sData .= fgets($hConnection, 128);
}
$aResponse = preg_split("/\r\n\r\n/", $sData);
$sHeader = $aResponse[0];
$sData = $aResponse[1];
if(!(strpos($sHeader,"Transfer-Encoding: chunked")===false))
{
$aAux = split("\r\n", $sData);
for($i=0; $i<count($aAux); $i++)
{
if($i==0 | ($i%2==0))
{
$aAux[$i] = '';
}
$sData = implode("",$aAux);
}
}
return chop($sData);
}
}
return false;
}
Я немного поправил код метода. Вот, что у меня получилось:
public function send($url, $method = 'POST', $userAgent = null, $cookies = null, $follow = false, $module = null)
{
$userAgent = $userAgent === null ? $_SERVER['HTTP_USER_AGENT'] : $userAgent;
$host = parse_url($url);
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, $follow);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
if ($method != 'GET')
{
curl_setopt($curl, CURLOPT_POST, true);
}
curl_setopt($curl, CURLOPT_HTTPHEADER, [
'User-Agent: ' . $userAgent,
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
'Connection: keep-alive',
'Upgrade-Insecure-Requests: 1',
'Sec-Fetch-Dest: document',
'Sec-Fetch-Mode: navigate',
'Sec-Fetch-Site: none',
'Sec-Fetch-User: ?1',
'Pragma: no-cache',
'Cache-Control: no-cache'
]
);
# Check if we need to set some cookies
if ($cookies !== null)
{
$line = "\n";
# Loop thru all the cookies we currently have set
foreach ($cookies as $key => $value)
{
# Make sure we don't see the session ID or the browser will crash
if ($key == 'PHPSESSID')
{
continue;
}
# Add the cookies
$line .= $key . '=' . $value . ';';
}
# Trim the cookie
$line = trim(rtrim($line, ';'));
# Set the cookie
curl_setopt($curl, CURLOPT_COOKIE, $line);
}
# Run the exec
$charset = $this->getCharset($url);
if (isset($charset) && !in_array($charset, ['utf-8', 'UTF-8']) && $module != 'video')
{
$data = iconv('CP1251', 'UTF-8', curl_exec($curl));
}
else
{
$data = curl_exec($curl);
}
# Close the curl connection
curl_close($curl);
# Return whatever we can from the curl request
return trim($data);
}
Моему движку не нужно значение переменной
$aPost
, поэтому я его убрал из метода и всех мест, где код обращается к методу.Возможно, будет интересно: Тема: Как обойти проверку на бота при парсинге стороннего сайта?.
- Жалоба