遇到 Https 网站, c# http 请求的时候, 总是报 SSL 连接错误. 后来经搜索, 发现有解决方案:
.net 2.0 需要引入一个第三方组件: BouncyCastle.dll, 这是我写的一个例子:
- public static string RequestwebServerByTCP(Uri uri, string method, NameValueCollection parameter, string cookie, Encoding encoding)
- {
- try
- {
- StringBuilder RequestHeaders = new StringBuilder();
- RequestHeaders.Append(method + "" + uri.PathAndQuery +" HTTP/1.1\r\n");
- method = method.ToUpper();
- if (method == POSTMETHOD)
- RequestHeaders.Append("Content-Type:application/x-www-form-urlencoded\r\n");
- RequestHeaders.Append("User-Agent:Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (Khtml, like Gecko) Chrome/23.0.1271.64 Safari/537.11\r\n");
- RequestHeaders.Append("Cookie:" + cookie + "\r\n");
- RequestHeaders.Append("Accept:*/*\r\n");
- RequestHeaders.Append("Host:" + uri.Host + "\r\n");
- byte[] postdata = null;
- StringBuilder sb = new StringBuilder();
- if (method == GETMETHOD)
- {
- uri = GetMethodQueryString(uri, parameter, encoding);
- }
- else if (method == POSTMETHOD)
- {
- if (parameter != null)
- {
- foreach (string key in parameter)
- {
- sb.Append(string.Format(FORMATSTR1, System.Web.HttpUtility.UrlEncode(key, encoding), System.Web.HttpUtility.UrlEncode(parameter[key], encoding)));
- }
- }
- if (sb.Length != 0)
- {
- sb = sb.Remove(sb.Length - 1, 1);
- }
- postdata = encoding.GetBytes(sb.ToString());
- RequestHeaders.Append("Content-Length:" + postdata.Length + "\r\n");
- }
- RequestHeaders.Append("Connection:close\r\n\r\n");
- byte[] req = Encoding.UTF8.GetBytes(RequestHeaders.ToString() + sb.ToString());
- int port = 443;
- MyTlsClient client = new MyTlsClient();
- var protocol = OpenTlsConnection(uri.Host, port, client);
- Stream tlsStream = protocol.Stream;
- tlsStream.Write(req, 0, req.Length);
- tlsStream.Flush();
- StreamReader reader = new StreamReader(tlsStream);
- String line;
- StringBuilder html = new StringBuilder();
- string firstLine = "";
- int i = 0;
- while ((line = reader.ReadLine()) != null)
- {
- if (i == 0)
- {
- firstLine = line;
- i++;
- }
- html.AppendLine(line);
- if (line.Contains("</html>"))
- {
- break;
- }
- }
- protocol.Close();
- string httpstatusCode = "";
- string[] httpstatus = firstLine.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
- if (httpstatus.Length> 2)
- {
- httpstatusCode = httpstatus[1];
- }
- else
- {
- // 请求无效
- httpstatusCode = "400";
- }
- return html.ToString();
- }
- catch
- {
- return "";
- }
- }
请求到的 html, 为什么需要一行一行读呢? 我在调试的时候发现有个 bug, 如果一次性读取的时候, 它停不下来, 最终报错, 所以我做了一个读到 html 末尾的判断.
继承了提供的默认类:
- class MyTlsClient : DefaultTlsClient
- {
- public override TlsAuthentication GetAuthentication()
- {
- return new MyTlsAuthentication();
- }
- }
- // Need class to handle certificate auth
- class MyTlsAuthentication : TlsAuthentication
- {
- public TlsCredentials GetClientCredentials(CertificateRequest certificateRequest)
- {
- // return client certificate
- return null;
- }
- public void NotifyServerCertificate(Certificate serverCertificate)
- {
- // validate server certificate
- }
- }
- internal static TlsClientProtocol OpenTlsConnection(string hostname, int port, TlsClient client)
- {
- TcpClient tcp = new TcpClient(hostname, port);
- TlsClientProtocol protocol = new TlsClientProtocol(tcp.GetStream(), secureRandom);
- protocol.Connect(client);
- return protocol;
- }
拼接 url 参数的方法:
- private static Uri GetMethodQueryString(Uri uri, NameValueCollection parameter, Encoding encoding)
- {
- List<KeyValuePair<string, string>> parameter1 = new List<KeyValuePair<string, string>>();
- foreach (string key in parameter)
- {
- parameter1.Add(new KeyValuePair<string, string>(key, parameter[key]));
- }
- return GetMethodQueryString(uri, parameter1, encoding);
- }
- private static Uri GetMethodQueryString(Uri uri, List<KeyValuePair<string, string>> parameter, Encoding encoding)
- {
- string format = string.Empty;
- UriBuilder uribuilfer = new UriBuilder(uri);
- string QueryString = string.Empty;
- if (string.IsNullOrEmpty(uribuilfer.Query))
- {
- format = FORMATSTR1;
- }
- else
- {
- format = FORMATSTR2;
- }
- QueryString = uribuilfer.Query;
- if (parameter != null)
- {
- foreach (KeyValuePair<string, string> item in parameter)
- {
- QueryString += string.Format(format, System.Web.HttpUtility.UrlEncode(item.Key, encoding), System.Web.HttpUtility.UrlEncode(item.Value, encoding));
- }
- }
- QueryString = QueryString.TrimEnd(new char[] { '&' });
- QueryString = QueryString.TrimStart(new char[] { '?' });
- uribuilfer.Query = QueryString;
- uri = uribuilfer.Uri;
- return uri;
- }
注意: List<KeyValuePair<string, string>> 和 NameValueCollection 类型的参数有什么区别呢? 它们都包含相同的 key, 只不过存储的时候, NameValueCollection 会把含有相同 Key 的值用逗号隔开, 存在一起. 这样请求有可能会失败, 拿不到数据. 本人因此问题, 折腾了很久, 用 python 实现了请求, 后来在. net core 中实现了一遍, 最后终于低下了高傲的头颅, 才看到传参时候, 有点问题.
.net 4.0 中, 只需要添加一句话: ServicePointManager.SecurityProtocol = (SecurityProtocolType)3072;
.net 4.5 中, 什么都不用管.
2.0 中连 TCP 都用上了, 不过我们看到了 http 请求的本质, 把一段具有格式的请求头 + 请求数据转为二进制发送到主机的某个端口, 返回流, 通过读取流, 就可以拿到结果.
说到这, 我们来看看 Request 消息格式:
- GET https://www.baidu.com/ HTTP/1.1
- Accept: text/html, application/xhtml+xml, */*
- Accept-Language: zh-CN
- User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko
- Accept-Encoding: gzip, deflate
- Connection: Keep-Alive
- Host: www.baidu.com
Cookie: BAIDUID=C1EFC3A3466AAAEBE74C6F6E7F413FA8:FG=1; BIDUPSID=C1EFC3A3466AAAEBE74C6F6E7F413FA8; PSTM=1525339270; BD_LAST_QID=12260391193367555241
1, 请求行, 包含请求的方法, url,http 协议版本
2, 请求头, 接收的格式, 浏览器代理, cookie 等等
3, 空行
4, 请求体, 传递数据
Response 格式:
- HTTP/1.1 200 OK
- Bdpagetype: 1
Bdqid: 0x9a1ff959000016d0
- Cache-Control: private
- Connection: Keep-Alive
- Content-Encoding: gzip
- Content-Type: text/html; charset=utf-8
Cxy_all: baidu+77e5655ffd82ce31adf5edff251fc585
Date: Thu, 03 May 2018 09:21:10 GMT
Expires: Thu, 03 May 2018 09:21:03 GMT
- Server: BWS/1.1
- Set-Cookie: BDSVRTM=0; path=/
- Set-Cookie: BD_HOME=0; path=/
- Set-Cookie: H_PS_PSSID=1428_21080_20719; path=/; domain=.baidu.com
- Strict-Transport-Security: max-age=172800
- Vary: Accept-Encoding
- X-Powered-By: HPHP
- X-Ua-Compatible: IE=Edge,chrome=1
- Transfer-Encoding: chunked
- html
1, 状态行
2, 消息报头, content-type,Date,Set-Cookie
3, 空行
4, 正文
来源: https://www.cnblogs.com/wangqiang3311/p/8986603.html