asp抓取页面

来源:百度文库 编辑:神马文学网 时间:2024/04/25 20:24:43

<%

if trim(request.form("url"))<>"" then

dim VBody:VBody=GetResStr(trim(request.form("url")))

dim Res:Res=VBody

dim code:code=GetCode(VBody,"charset= {0,}([^ ]+) {0,}""")

end if

%>

抓取页面

ion="ss.asp" method="post">

请输入%20name=" type=text value="<%=trim(request.form(" P >>< url?))%>? size="60" url?>

页面编码:<%=code%>

<%

function GetResStr(URL)

dim ResBody,ResStr,PageCode

Set Http=server.createobject("msxml2.serverxmlhttp.3.0")

Http.setTimeouts 10000, 10000, 10000, 10000

Http.open "GET",URL,False

Http.Send()

If Http.Readystate =4 Then

  If Http.status=200 Then

    ResStr=http.responseText

    ResBody=http.responseBody

    PageCode=replace(GetCode(ResStr,"charset=([^\""].*)"""),chr(13)&chr(10),"")

    GetResStr=BytesToBstr(http.responseBody,trim(PageCode))

  End If

End If

End Function

'函数名:BytesToBstr

'作用:转换二进制数据为字符

'参数:Body-二进制数据,Cset-文本编码方式

Function BytesToBstr(Body,Cset)

  Dim Objstream

  Set Objstream = Server.CreateObject("adodb.stream")

  objstream.Type = 1

  objstream.Mode =3

  objstream.Open

  objstream.Write body

  objstream.Position = 0

  objstream.Type = 2

  objstream.Charset = Cset

  BytesToBstr = objstream.ReadText

  objstream.Close

  set objstream = nothing

End Function

 

'函数名:GetCode

'作用:转换二进制为字符

'参数:str-待查询字符串,regstr-正则表达式

Function GetCode(str,regstr)

Dim Reg

set Reg= new RegExp

Reg.IgnoreCase = True

Reg.MultiLine = True

Reg.Pattern =regstr

Set Cols = Reg.Execute(str)

str=Cols(0).SubMatches(0)

GetCode=str

end function

%>