获取微信公众号文章内视频真实地址 - 新闻资讯 - 云南小程序开发|云南软件开发|云南网站建设-昆明葵宇信息科技有限公司

159-8711-8523

云南网建设/小程序开发/软件开发

知识

不管是网站,软件还是小程序,都要直接或间接能为您产生价值,我们在追求其视觉表现的同时,更侧重于功能的便捷,营销的便利,运营的高效,让网站成为营销工具,让软件能切实提升企业内部管理水平和效率。优秀的程序为后期升级提供便捷的支持!

您当前位置>首页 » 新闻资讯 » 公众号相关 >

获取微信公众号文章内视频真实地址

发表时间:2020-10-28

发布人:葵宇科技

浏览次数:62

PHP获取微信公众号文章内视频与音频真实地址


微信公众号的文章的爬取可以看到这个项目:github 基于QueryList的微信公众号文章获取工具


有任何问题可以留言。

返回数据:

  1. 包含音频和视频包含音频和视频,按照音频格式和视频格式返回。
  2. 包含多个视频 包含多个视频

原有找到的文章 如何抓取微信公众号文章里面的视频 中间只有原本的公众号文章内的腾讯视频的获取,本方案增加了微信视频素材的获取和音频的获取。优化了腾讯视频获取的方案。

  • 文章内腾讯视频的真实地址获取
  • 文章内的视频素材的真实地址获取
  • 文章内的音频的获取
  • 文章资源下载

请求

<?php

//var_dump($_GET);
$con = new Con();
$url = $_GET['url'];
$res = $con->actionGetwx($url);
var_dump($res);
die;

获取资源

/**
 * Class con
 * @author Fawkes
 * @time 2020-10-28
 */
class con {
    /**
     * 抓取微信公众号的文章和里面的视频 url
     * @param $url
     * @return array|bool
     */
    public function actionGetwx($url) {
        if (empty($url)) {
            echo '请输入公众号文章地址';
            return false;
        }
        $info_id_arr = $this->getChatInfoId($url);
        //获取真实地址链接
        $info_arr = [];
        foreach ($info_id_arr as $key=>$value){
            //获取视频
            switch ($key){
                case 'video':
                    $info_arr['video'] = [];
                    if(!empty($value)){
                        foreach ($value as $vid){
                            $video_json = Tools::getVqqInfo($vid);
                            if(!empty($video_json['msg']) && $video_json['msg'] == 'vid is wrong'){
                                //检测微视
                                $return = $this->weishiQQCom($vid);
                            }else{
                                //腾讯视频的真是地址获取
                                $return = $this->vQQCom($video_json);
                            }
                            $info_arr['video'][] = $return;
                        };
                    }
                    break;
                case 'voice':
                    $info_arr['voice'] = [];
                    if(!empty($value)){
                        foreach ($value as $vid){
                            $return = $this->voiceInfo($vid);
                            $info_arr['voice'][] = $return;
                        };
                    }
                    break;
                default:
                    break;
            }
        }
        return $info_arr;
    }

    /**
     * 获取公众号中的资源  音频和视频
     * @param $url
     * @return array
     */
    public function getChatInfoId($url){
        //微信的链接有长链和短链,以下为长链
        //$url ='http://mp.weixin.qq.com/s?__biz=MzI0NTc1MTczNA==&mid=2247485130&idx=1&sn=945cfb8b0cfdd99f1b730889de0216e2&chksm=e9488c13de3f05057be6c6b065f8e44d43c566cb9ee3a4f35cf8084382742159181ea480b935&scene=27';
        if (stripos($url, '?')) {
            if (stripos($url, '#wechat_redirect')) {
                $url = str_replace('#wechat_redirect', '', $url);
            }
            $json = $url . '&f=json';
        } else {
            $json = $url . '?f=json';
        }
        $data = Tools::curl_request($json);
        $data = json_decode($data, 1);
        $chat_info_id = [];
        //获取json中的得到视频vid
        $vid_arr = $data['video_ids'] ?? [];
        //获取json中的得到音频的mid
        $voice_arr = array_column($data['voice_in_appmsg'],'voice_id') ?? [];
        if(empty($vid_arr)){
            //data 为文章的详情
            $html = $data['content_noencode'];
            preg_match_all('/<iframe (.*?)data-src="(.*?)">/', $html, $matchs);
            //没有视频脚本退出
            if (empty($matchs[2])) {
                echo '没有视频匹配到,不采集';
                die;
            }
            //判断是否是url地址  而后解析得出 vid的值
            $url = current($matchs[2]);
            if(!filter_var($url,FILTER_VALIDATE_URL)){
                echo '视频地址异常:'.$url;
                die;
            }
            $url = str_replace('&amp;','&',$url);
            //https://v.qq.com/iframe/preview.html?vid=i1324786hv8&width=500&height=375&auto=0
            $url_arr = parse_url($url);
            $query = $url_arr['query'] ?? '';
            $vidArray = explode("&vid=",$query);
            //获取到vid
            $vid_arr = [$vidArray[1]] ?? '';
            if(empty($vid_arr)){
                echo '视频参数异常:'.$query;
                die;
            }
        }
        $chat_info_id['video'] = $vid_arr;
        $chat_info_id['voice'] = $voice_arr;
        return $chat_info_id;
    }

    /**
     * 腾讯微视获取真实地址
     */
    private function weishiQQCom($vid){
        $url = 'https://mp.weixin.qq.com/mp/videoplayer?action=get_mp_video_play_url&vid='.$vid;
        $data = Tools::curl_request($url);
        $data = json_decode($data,1);
        //得到数据的json 组装成功url
        $format_id = $data['url_info'][0]['format_id'];
        $title = $data['title'];
        $url = $data['url_info'][0]['url']."&vid=$vid&format_id=$format_id";
        return [
            'vid' => $vid,
            'type' => '公众号素材视频',
            'title' => $title,
            'url' => $url
        ];
    }


    /**
     * 腾讯视频的处理url
     * @param array $video_json 腾讯视频数据
     * @return array
     */
    private function vQQCom(array $video_json){
        $title = $video_json['vl']['vi'][0]['ti'];
        $vid = $video_json['vl']['vi'][0]['vid'];
        //高质量视频
        $fn_pre = $video_json['vl']['vi'][0]['lnk'];
        $host = $video_json['vl']['vi'][0]['ul']['ui'][0]['url'];
        $streams = $video_json['fl']['fi'];
        $seg_cnt = $video_json['vl']['vi'][0]['cl']['fc'];
        $best_quality = end($streams)['name'];
        $part_format_id = end($streams)['id'];
        $part_urls = [];
        for ($part = 1; $part <= $seg_cnt + 1; $part++) {
            $filename = $fn_pre . '.p' . ($part_format_id % 10000) . '.' . $part . '.mp4';
            $key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format="
                . $part_format_id . "&vid=" . $vid . "&filename=" . $filename . "&appver=3.2.19.333";
            $part_info = Tools::curl($key_api);
            preg_match('/QZOutputJson=(.*);$/Uis', $part_info, $key_json);
            $key_json = json_decode($key_json[1], 1);
            if (empty($key_json['key'])) {
                $vkey = $video_json['vl']['vi'][0]['fvkey'];
                $url = $video_json['vl']['vi'][0]['ul']['ui'][0]['url'] . $fn_pre . '.mp4?vkey=' . $vkey;
            } else {
                $vkey = $key_json['key'];
                $url = $host . $filename . "?vkey=" . $vkey;
            }
            $part_urls[] = $url;
        }
        //真实的地址
        if (empty($part_urls)) {
            //获取的视频质量低
            if (!empty($video_json['vl']['vi'])) {
                $keys = [];
                foreach ($video_json['vl']['vi'] as $key => $value) {
                    $fvkey = $value['fvkey'];
                    $fn = $value['fn'];
                    $self_host = $value['ul']['ui'][$key]['url'];
                    $keys['fvkey'] = $fvkey;
                    $keys['fn'] = $fn;
                    $keys['self_host'] = $self_host;
                    $keys['lnk'] = $value['lnk'];
                }
                $part_urls[0] = $keys['self_host'] . $keys['fn'] . '?vkey=' . $keys['fvkey'];
            }
        }
        return [
            'vid' => $vid,
            'type' => '腾讯视频',
            'title' => $title,
            'url' => current($part_urls)
        ];
    }


    /**
     * 获取音频真实地址
     * @param string $vid
     */
    private function voiceInfo(string $vid){
        $url = 'https://res.wx.qq.com/voice/getvoice?mediaid='.$vid;
        return [
            'vid' => $vid,
            'type' => '音频资料',
            'url' => $url
        ];
    }
}

使用的工具类




class Tools {
    public  static function httpcopy($url, $file="", $timeout=60) {
        $file = empty($file) ? pathinfo($url,PATHINFO_BASENAME) : $file;
        $dir = pathinfo($file,PATHINFO_DIRNAME);
        !is_dir($dir) && @mkdir($dir,0755,true);
        $url = str_replace(" ","%20",$url);

        if(function_exists('curl_init')) {
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
            $temp = curl_exec($ch);
            if(@file_put_contents($file, $temp) && !curl_error($ch)) {
                return $file;
            } else {
                return false;
            }
        } else {
            $opts = array(
                "http"=>array(
                    "method"=>"GET",
                    "header"=>"",
                    "timeout"=>$timeout)
            );
            $context = stream_context_create($opts);
            if(@copy($url, $file, $context)) {
                return $file;
            } else {
                return false;
            }
        }
    }


    public static function curl_request($url, $post = '',$header = [], $cookie = '', $returnCookie = 0) {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)');
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
        curl_setopt($curl, CURLOPT_REFERER, "http://XXX");
        if ($post) {
            curl_setopt($curl, CURLOPT_POST, 1);
            curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));
        }
        if($header){
            curl_setopt($curl,CURLOPT_HTTPHEADER,$header);
        }
        if ($cookie) {
            curl_setopt($curl, CURLOPT_COOKIE, $cookie);
        }
        curl_setopt($curl, CURLOPT_HEADER, $returnCookie);
        curl_setopt($curl, CURLOPT_TIMEOUT, 60);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        $data = curl_exec($curl);
        if (curl_errno($curl)) {
            return curl_error($curl);
        }
        curl_close($curl);
        if ($returnCookie) {
            list($header, $body) = explode("\r\n\r\n", $data, 2);
            preg_match_all("/Set\-Cookie:([^;]*);/", $header, $matches);
            $info['cookie'] = substr($matches[1][0], 1);
            $info['content'] = $body;
            return $info;
        } else {
            return $data;
        }
    }
    public static function getVqqInfo($vid) {
        //    $json_type = 'json';
        //    $platform = '11001';
        //    $guid = 'ba7f7fab8f8aef2c4ae45883f221c04d';
        //    $sdtfrom = 'v1010';
        //    $_qv_rmt = '703hXQMOA1937593I%3D';
        //    $_qv_rmt2 = 'egp3C2aj149060UUA%3D';
        //    $url = sprintf('http://vv.video.qq.com/getinfo?vid=%s&otype=%s&guid=%s&platform=%s&sdtfrom=%s&_qv_rmt=%s&_qv_rmt2=%s', $vid, $json_type, $guid, $platform, $sdtfrom, $_qv_rmt, $_qv_rmt2);
        $infourl = 'https://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform=11&defnpayver=1&vid=' . $vid;
        $data = self::curl($infourl);
        preg_match('/QZOutputJson=(.*);$/Uis', $data, $info);
        return json_decode($info[1], 1);
    }
    public static function getExt($url) {
        $urlinfo = parse_url($url);
        $file = basename($urlinfo['path']);
        if (strpos($file, '.') !== false) {
            $ext = explode('.', $file);
            return $ext[count($ext) - 1];
        }
        return 'no extension';
    }
    public static function strFilter($str) {
        $str = str_replace('  ', '', $str);
        $str = str_replace(' ', '', $str);
        $str = str_replace('`', '', $str);
        $str = str_replace('·', '', $str);
        $str = str_replace('~', '', $str);
        $str = str_replace('!', '', $str);
        $str = str_replace('!', '', $str);
        $str = str_replace('@', '', $str);
        $str = str_replace('#', '', $str);
        $str = str_replace('$', '', $str);
        $str = str_replace('¥', '', $str);
        $str = str_replace('%', '', $str);
        $str = str_replace('^', '', $str);
        $str = str_replace('……', '', $str);
        $str = str_replace('&', '', $str);
        $str = str_replace('*', '', $str);
        $str = str_replace('(', '', $str);
        $str = str_replace(')', '', $str);
        $str = str_replace('(', '', $str);
        $str = str_replace(')', '', $str);
        $str = str_replace('-', '', $str);
        $str = str_replace('_', '', $str);
        $str = str_replace('——', '', $str);
        $str = str_replace('+', '', $str);
        $str = str_replace('=', '', $str);
        $str = str_replace('|', '', $str);
        $str = str_replace('\\', '', $str);
        $str = str_replace('[', '', $str);
        $str = str_replace(']', '', $str);
        $str = str_replace('【', '', $str);
        $str = str_replace('】', '', $str);
        $str = str_replace('{', '', $str);
        $str = str_replace('}', '', $str);
        $str = str_replace(';', '', $str);
        $str = str_replace(';', '', $str);
        $str = str_replace(':', '', $str);
        $str = str_replace(':', '', $str);
        $str = str_replace('\'', '', $str);
        $str = str_replace('"', '', $str);
        $str = str_replace('“', '', $str);
        $str = str_replace('”', '', $str);
        $str = str_replace(',', '', $str);
        $str = str_replace(',', '', $str);
        $str = str_replace('<', '', $str);
        $str = str_replace('>', '', $str);
        $str = str_replace('《', '', $str);
        $str = str_replace('》', '', $str);
        $str = str_replace('.', '', $str);
        $str = str_replace('。', '', $str);
        $str = str_replace('/', '', $str);
        $str = str_replace('、', '', $str);
        $str = str_replace('?', '', $str);
        $str = str_replace('?', '', $str);
        return trim($str);
    }
    public static function curl($url, $option = []) {
        $split = explode('/', $url);
        $cookiejar = str_replace('\\', '/', dirname(__FILE__)) . '/' . $split[2] . '.cookie';
        $ch = curl_init();
        $options = $option?$option:[
            CURLOPT_URL => $url,
            CURLOPT_HTTPHEADER => [
                "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
                "Accept-Charset: UTF-8,*;q=0.5",
                "Accept-Encoding': 'gzip,deflate,sdch",
                "Accept-Language': 'en-US,en;q=0.8",
                "User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
            ],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_FOLLOWLOCATION => 1,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_CONNECTTIMEOUT => 5,
            CURLOPT_TIMEOUT => 5,
            CURLOPT_COOKIEJAR => $cookiejar,
            CURLOPT_COOKIEFILE => $cookiejar,
        ];
        curl_setopt_array($ch, $options);
        $response = curl_exec($ch);
        curl_close($ch);
        return $response;
    }
}

相关案例查看更多