前言:
而今同学们对“php微博发布”大体比较关切,大家都想要剖析一些“php微博发布”的相关内容。那么小编同时在网络上搜集了一些关于“php微博发布””的相关内容,希望兄弟们能喜欢,我们快快来学习一下吧!String strURL=";;// 5723344072
URL url=null;
HttpURLConnection httpConn=null;
url = new URL(strURL);
httpConn = (HttpURLConnection) url.openConnection();
//String c="SUB=_2AkMqj-zif8NxqwJRmfkcyG7la4R0ygjEieKc0x05JRMxHRl-yT9jqhUitRB6AQ_CDRrmGwjoWaf2alXg9Yfxki-R4Nwe; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9W5gfVwXwLLzATj6ArcV1q7i; SINAGLOBAL=2525797642447.1143.1576751690811; _s_tentry=localhost:8080; Apache=9113724801556.377.1583116766626; ULV=1583116766636:2:1:1:9113724801556.377.1583116766626:1582854844672; TC-V5-G0=4de7df00d4dc12eb0897c97413797808; login_sid_t=96a715575970779900d6d744eadd4ef1; cross_origin_proto=SSL; UOR=,,localhost:8080; Ugrow-G0=140ad66ad7317901fc818d7fd7743564; wb_view_log=1920*10801; TC-Page-G0=1ae767ccb34a580ffdaaa3a58eb208b8|1584343362|1584343362";
//String c="SINAGLOBAL=2525797642447.1143.1576751690811; _s_tentry=localhost:8080; Apache=9113724801556.377.1583116766626; ULV=1583116766636:2:1:1:9113724801556.377.1583116766626:1582854844672; TC-V5-G0=4de7df00d4dc12eb0897c97413797808; login_sid_t=96a715575970779900d6d744eadd4ef1; cross_origin_proto=SSL; Ugrow-G0=140ad66ad7317901fc818d7fd7743564; WBtopGlobal_register_version=3d5b6de7399dfbdb; wb_view_log_6439293145=1920*10801; wb_view_log=1920*10801; UOR=,,; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWlIp9eUCCs0AXkbTy9zp7x5JpX5K2hUgL.Foqpeo-NeKqNS0.2dJLoIEXLxKqLBonL1h-LxKMLB.2LB-qLxKML1-2L1hBLxKnLBKqL1h2LxKqLB-BLB.zt; ALF=1615971499; SSOLoginState=1584435500; SCF=AjcCfB6DUrrZ2fMhnntI_TyQc2JsccpWc3X4bHbuPEpJHcrUMAiEDq2Fby6kEoayWwopa6y9lMEbqh1h7NHOffM.; SUB=_2A25zdOF8DeRhGeBP6VcW8SjLzDWIHXVQAFW0rDV8PUNbmtANLUr-kW9NRWCbkhivb5UzMh1zGT7KgW6D-dSnnHFj; SUHB=0sqBpKsKWKrq8Z; un=18595757685; wvr=6; wb_view_log_6125716779=1920*10801; TC-Page-G0=1ae767ccb34a580ffdaaa3a58eb208b8|1584440113|1584440107; webim_unReadCount=%7B%22time%22%3A1584440351054%2C%22dm_pub_total%22%3A0%2C%22chat_group_client%22%3A0%2C%22allcountNum%22%3A3%2C%22msgbox%22%3A0%7D";
//String c="SINAGLOBAL=2525797642447.1143.1576751690811; _s_tentry=localhost:8080; Apache=9113724801556.377.1583116766626; ULV=1583116766636:2:1:1:9113724801556.377.1583116766626:1582854844672; TC-V5-G0=4de7df00d4dc12eb0897c97413797808; login_sid_t=96a715575970779900d6d744eadd4ef1; cross_origin_proto=SSL; Ugrow-G0=140ad66ad7317901fc818d7fd7743564; WBtopGlobal_register_version=3d5b6de7399dfbdb; wb_view_log_6439293145=1920*10801; wb_view_log=1920*10801; UOR=,,; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWlIp9eUCCs0AXkbTy9zp7x5JpX5K2hUgL.Foqpeo-NeKqNS0.2dJLoIEXLxKqLBonL1h-LxKMLB.2LB-qLxKML1-2L1hBLxKnLBKqL1h2LxKqLB-BLB.zt; ALF=1615971499; SSOLoginState=1584435500; SCF=AjcCfB6DUrrZ2fMhnntI_TyQc2JsccpWc3X4bHbuPEpJHcrUMAiEDq2Fby6kEoayWwopa6y9lMEbqh1h7NHOffM.; SUB=_2A25zdOF8DeRhGeBP6VcW8SjLzDWIHXVQAFW0rDV8PUNbmtANLUr-kW9NRWCbkhivb5UzMh1zGT7KgW6D-dSnnHFj; SUHB=0sqBpKsKWKrq8Z; un=18595757685; wvr=6; wb_view_log_6125716779=1920*10801; webim_unReadCount=%7B%22time%22%3A1584440603192%2C%22dm_pub_total%22%3A0%2C%22chat_group_client%22%3A0%2C%22allcountNum%22%3A3%2C%22msgbox%22%3A0%7D; TC-Page-G0=b993e9b6e353749ed3459e1837a0ae89|1584440608|1584440580";
String c="SINAGLOBAL=2525797642447.1143.1576751690811; UOR=,,login.sina.com.cn; TC-V5-G0=595b7637c272b28fccec3e9d529f251a; SSOLoginState=1585210218; Ugrow-G0=7e0e6b57abe2c2f76f677abd9a9ed65d; wvr=6; _s_tentry=weibo.com; Apache=7211436044072.67.1585211180994; ULV=1585211181930:3:2:1:7211436044072.67.1585211180994:1583116766636; SUB=_2AkMp3ULYf8PxqwJRmfkcyG7la4R0ygjEieKfgbMDJRMxHRl-yT9jqk8GtRB6Al1sKDCUM-bsv44hS2JWofGDBG0WLLhQ; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9W5gfVwXwLLzATj6ArcV1q7i; TC-Page-G0=62b98c0fc3e291bc0c7511933c1b13ad|1585565168|1585565167";
//String c="SINAGLOBAL=2525797642447.1143.1576751690811; UOR=,,login.sina.com.cn; TC-V5-G0=595b7637c272b28fccec3e9d529f251a; SSOLoginState=1585210218; Ugrow-G0=7e0e6b57abe2c2f76f677abd9a9ed65d; wvr=6; _s_tentry=weibo.com; Apache=7211436044072.67.1585211180994; ULV=1585211181930:3:2:1:7211436044072.67.1585211180994:1583116766636; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWlIp9eUCCs0AXkbTy9zp7x5JpX5KMhUgL.Foqpeo-NeKqNS0.2dJLoIEXLxKqLBonL1h-LxKMLB.2LB-qLxKML1-2L1hBLxKnLBKqL1h2LxKqLB-BLB.zt; ALF=1616895226; SCF=AjcCfB6DUrrZ2fMhnntI_TyQc2JsccpWc3X4bHbuPEpJIndU4aQ389BrJPQyB4i6Qj847pZmhvQfZIHMPwC8ARc.; SUB=_2A25zetktDeRhGeBP6VcW8SjLzDWIHXVQDk3lrDV8PUNbmtAKLWXFkW9NRWCbkkZQxvxIVxXfNU1QVQBfoeucUtmz; SUHB=0LcSPzPnzcU0HI; wb_view_log_6125716779=1920*10801; TC-Page-G0=841d8e04c4761f733a87c822f72195f3|1585363186|1585363180; webim_unReadCount=%7B%22time%22%3A1585363187658%2C%22dm_pub_total%22%3A0%2C%22chat_group_client%22%3A0%2C%22allcountNum%22%3A36%2C%22msgbox%22%3A0%7D";
//第一重点 微博扒取数据,因为微博用了Cookie,所以我们扒取时爬到的是个接近空的网页,里面什么内容也没有,跟电脑上F12看到的完全不一样
httpConn.setRequestProperty("Cookie", c);
httpConn.setRequestProperty("charset", "utf-8");
InputStreamReader input = new InputStreamReader(httpConn.getInputStream(), "utf-8");
BufferedReader bufReader = new BufferedReader(input);
String line = "";
StringBuilder contentBuf = new StringBuilder();
while ((line = bufReader.readLine()) != null) {
contentBuf.append(line);
}
String buf = contentBuf.toString();
System.out.println(buf);
Document document = Jsoup.parse(buf);
//第二重点 微博上面的数据都是用PHP写的,页面的内容也都是用script渲染上面的,所以我们要获取script中渲染的数据,script渲染数据也是个正常的json数据,所一般能拿到指定的script很重要,微博里面所有的内容并不是由一个script渲染的,
//他由很多的script渲染,所以要想拿到指定的内容就要拿到指定的script,下面看怎么找script。所以找到指定的script很重要。
Elements elements=document.select("script");
for (Element element : elements) {
//解析script
String s1=element.data().split("<script>FM.view")[0];
//一层一层 抽丝剥茧拿到自己的内容
if (s1.contains("\"html\":\"")) {
if(s1.split("\"html\":\"")[0].contains("Pl_Official_Headerv6__1")) {
String content = s1.split("\"html\":\"")[1].replaceAll("(\\\\t|\\\\n|\\\\r)", "").replaceAll("\\\\\"", "\"").replaceAll("\\\\/", "/");
content = content.substring(0,content.length() <= 13 ? content.length(): content.length() - 13);
Document header = Jsoup.parse(content);
Elements headerphoto= header.getElementsByClass("photo");
Elements username= header.getElementsByClass("username");
String nickName=username.text();
String img_url=headerphoto.attr("src");
// w.setNickname(nickName);
// w.setImg_url(img_url);
System.out.println(nickName);
System.out.println(img_url);
}
if(s1.split("\"html\":\"")[0].contains("Pl_Core_T8CustomTriColumn__3")) {
String content = s1.split("\"html\":\"")[1].replaceAll("(\\\\t|\\\\n|\\\\r)", "").replaceAll("\\\\\"", "\"").replaceAll("\\\\/", "/");
content = content.substring(0,content.length() <= 13 ? content.length(): content.length() - 13);
Document header = Jsoup.parse(content);
Elements data= header.getElementsByClass("W_f14");
if(data.size()==0) {
data= header.getElementsByClass("W_f16");
}
if(data.size()==0) {
data= header.getElementsByClass("W_f18");
}
String fun=data.get(1).text();
// w.setFan_num(fun);
System.out.println(fun);
}
}
}
/*//System.out.println(elements);
System.out.println(elements.size());
String PCD_header=elements.get(10).data().split("<script>FM.view")[0];
if (PCD_header.contains("\"html\":\"")) {
String content = PCD_header.split("\"html\":\"")[1].replaceAll("(\\\\t|\\\\n|\\\\r)", "").replaceAll("\\\\\"", "\"").replaceAll("\\\\/", "/");
content = content.substring(0,content.length() <= 13 ? content.length(): content.length() - 13);
Document header = Jsoup.parse(content);
Elements headerphoto= header.getElementsByClass("photo");
Elements username= header.getElementsByClass("username");
String nickName=username.text();
String img_url=headerphoto.attr("src");
System.out.println(nickName);
System.out.println(img_url);
}
//关注 粉丝 发布微博数
String Column__3=elements.get(13).data().split("<script>FM.view")[0];
//System.out.println(Column__3);
if (Column__3.contains("\"html\":\"")) {
String content = Column__3.split("\"html\":\"")[1].replaceAll("(\\\\t|\\\\n|\\\\r)", "").replaceAll("\\\\\"", "\"").replaceAll("\\\\/", "/");
content = content.substring(0,content.length() <= 13 ? content.length(): content.length() - 13);
Document header = Jsoup.parse(content);
Elements data= header.getElementsByClass("W_f14");
//String guanzhu=data.get(0).text();
String fun=data.get(1).text();
//String weibo=data.get(2).text();
//System.out.println(guanzhu);
System.out.println(fun);
// System.out.println(weibo);
} */
/*System.out.println("--------------------------------------------------------");
//时间 转发 评论 赞 带U的微博链接32 不带的31
String Pl_Official_MyProfileFeed__21=elements.get(32).data().split("<script>FM.view")[0];
// System.out.println(Pl_Official_MyProfileFeed__21);
if (Pl_Official_MyProfileFeed__21.contains("\"html\":\"")) {
String content = Pl_Official_MyProfileFeed__21.split("\"html\":\"")[1].replaceAll("(\\\\t|\\\\n|\\\\r)", "").replaceAll("\\\\\"", "\"").replaceAll("\\\\/", "/");
content = content.substring(0,content.length() <= 13 ? content.length(): content.length() - 13);
Document header = Jsoup.parse(content);
// System.out.println(header);
Elements data= header.getElementsByClass("WB_feed_like");
for (Element element : data) {
// element.text();S_txt2
Elements WB_from=element.getElementsByClass("WB_from");
String time=WB_from.get(0).getElementsByTag("a").get(0).text();
String href=WB_from.get(0).getElementsByTag("a").get(0).attr("href");
System.out.println(time);
System.out.println(href);
Elements WB_row_line=element.getElementsByClass("WB_row_line");
Elements S_line1=WB_row_line.get(0).getElementsByClass("S_line1");
String zhuanfa=S_line1.get(1).getElementsByTag("em").get(1).text();
if(zhuanfa.equals("转发")) {
zhuanfa="0";
}
System.out.println(zhuanfa);
String pinglun=S_line1.get(3).getElementsByTag("em").get(1).text();
System.out.println(pinglun);
String zan=S_line1.get(5).getElementsByTag("em").get(1).text();
System.out.println(zan);
}
} */
}