Wikipediaの任意の項目の本文に入っているURLがリンク切れになっていないかどうかチェック v2

AppleScript名:Wikipediaの任意の項目の本文に入っているURLがリンク切れになっていないかどうかチェック v2
— Created 2017-03-29 by Takaaki Naganoya
— 2017 Piyomaru Software
use AppleScript version "2.5"
use scripting additions
use framework "Foundation"

set aKeyword to "AppleScript"

set aStr to getBody(aKeyword) of me

if aStr begins with "#転送" then
  set bStr to detectForwarding(aStr) of me
set aStr to getBody(bStr) of me
end if

set aRes to extractURLsAndValidateThem(aStr) of me
–>  {​​​​​safeURL:27, ​​​​​forwardedURL:19, ​​​​​brokenURL:0, ​​​​​brokenURLs:{}​​​}

on detectForwarding(aStr)
  if aStr begins with "#転送" then
    set aRes to parseByDelim(aStr, {"[[", "]]"}) of me
–>  {​​​​​"#転送 ", ​​​​​"MacOS", ​​​​​""​​​}
return item 2 of aRes –エラーチェックはやっていない。大甘
    return aStr
  end if
end detectForwarding

on extractURLsAndValidateThem(aStr)
  set urlList to extractLinksFromNaturalText(aStr) of me as list
set okList to {}
set fwList to {}
set ngList to {}
repeat with i in urlList
    set j to contents of i
set aTarg to j’s absoluteString() as string
set {exRes, headerRes, aData, resURL} to checkURLResourceExistence(j, 30) of me
if exRes = false then
set bStr to cleaningURLStr(aTarg) of me
if bStr = false then
set the end of ngList to aTarg
set bURL to (current application’s |NSURL|’s URLWithString:bStr)
set {exRes, headerRes, aData, resURL} to checkURLResourceExistence(bURL, 60) of me
if exRes = false then
set the end of ngList to bStr
        else if resURL is not equal to bStr then
set the end of fwList to bStr
set the end of okList to bStr
        end if
      end if
    else if resURL is not equal to aTarg then
set the end of fwList to aTarg
set the end of okList to aTarg
    end if
  end repeat
set resList to {safeURL:length of okList, forwardedURL:length of fwList, brokenURL:length of ngList, brokenURLs:ngList}
return resList
end extractURLsAndValidateThem

on cleaningURLStr(aStr)
  set anOffset to offset of "%20" in aStr
if anOffset = 0 then return false
set bStr to text 1 thru (anOffset1) of aStr
return bStr
end cleaningURLStr

on getBody(aKeyword)
  –set reqURLStr to ""–English Version
set reqURLStr to "" –Japanese Version
set aRec to {action:"query", titles:aKeyword, |prop|:"revisions", rvprop:"content", |format|:"json"}
–set aRec to {action:"query", titles:"AppleScript|Mac OS X|Objective-C", |prop|:"revisions", rvprop:"content", |format|:"json"}
set aURL to retURLwithParams(reqURLStr, aRec) of me
set aRes to callRestGETAPIAndParseResults(aURL) of me
set aRESTres to (json of aRes)
–> {query:{pages:{2954:{pageid:2954, title:"AppleScript", revisions:{{contentformat:"text/x-wiki", *:"{{Infobox プログラミング言語|名前 = AppleScript ……., contentmodel:"wikitext"}}, ns:0}}}, batchcomplete:""}
set aRes to (aRESTres’s valueForKeyPath:"query.pages")
set aKeyStr to (aRes’s allKeys()’s firstObject()) as string
set aKeyPath to aKeyStr & ".revisions.*"
set aBody to (aRes’s valueForKeyPath:aKeyPath)’s firstObject() as string
return aBody
end getBody

–GET methodのREST APIを呼ぶ
on callRestGETAPIAndParseResults(aURL)
  set aRequest to current application’s NSMutableURLRequest’s requestWithURL:(current application’s |NSURL|’s URLWithString:aURL)
aRequest’s setHTTPMethod:"GET"
aRequest’s setCachePolicy:(current application’s NSURLRequestReloadIgnoringLocalCacheData)
aRequest’s setHTTPShouldHandleCookies:false
aRequest’s setTimeoutInterval:60
aRequest’s setValue:"application/json" forHTTPHeaderField:"Accept"
set aRes to current application’s NSURLConnection’s sendSynchronousRequest:aRequest returningResponse:(reference) |error|:(missing value)
set resList to aRes as list
set bRes to contents of (first item of resList)
set resStr to current application’s NSString’s alloc()’s initWithData:bRes encoding:(current application’s NSUTF8StringEncoding)
set jsonString to current application’s NSString’s stringWithString:resStr
set jsonData to jsonString’s dataUsingEncoding:(current application’s NSUTF8StringEncoding)
set aJsonDict to current application’s NSJSONSerialization’s JSONObjectWithData:jsonData options:0 |error|:(missing value)
–Get Response Code & Header
set dRes to contents of second item of resList
if dRes is not equal to missing value then
    set resCode to (dRes’s statusCode()) as number
set resHeaders to (dRes’s allHeaderFields()) as record
    set resCode to 0
set resHeaders to {}
  end if
return {json:aJsonDict, responseCode:resCode, responseHeader:resHeaders}
end callRestGETAPIAndParseResults

on retURLwithParams(aBaseURL, aRec)
  set aDic to current application’s NSMutableDictionary’s dictionaryWithDictionary:aRec
set aKeyList to (aDic’s allKeys()) as list
set aValList to (aDic’s allValues()) as list
set aLen to length of aKeyList
set qList to {}
repeat with i from 1 to aLen
    set aName to contents of item i of aKeyList
set aVal to contents of item i of aValList
set the end of qList to (current application’s NSURLQueryItem’s queryItemWithName:aName value:aVal)
  end repeat
set aComp to current application’s NSURLComponents’s alloc()’s initWithString:aBaseURL
aComp’s setQueryItems:qList
set aURL to (aComp’s |URL|()’s absoluteString()) as text
return aURL
end retURLwithParams

on urlencodeStr(aStr)
  set aString to current application’s NSString’s stringWithString:aStr
set aString to (aString’s stringByAddingPercentEncodingWithAllowedCharacters:(current application’s NSCharacterSet’s URLQueryAllowedCharacterSet())) as text
return aString
end urlencodeStr

— 指定URLにファイル(画像など)が存在するかチェック
–> {存在確認結果(boolean), レスポンスヘッダー(NSDictionary), データ(NSData), 最終的なURLの文字列}
on checkURLResourceExistence(aURL, timeOutSec as real)
  set aRequest to (current application’s NSURLRequest’s requestWithURL:aURL cachePolicy:(current application’s NSURLRequestUseProtocolCachePolicy) timeoutInterval:timeOutSec)
set aRes to (current application’s NSURLConnection’s sendSynchronousRequest:aRequest returningResponse:(reference) |error|:(missing value))
set dRes to (first item of (aRes as list))
set bRes to (second item of (aRes as list))
if bRes = missing value then
    set hRes to {}
set aResCode to -1 –error
return {false, hRes, dRes, missing value}
    set resURL to ((|URL| of bRes)’s |absoluteURL|()’s absoluteString()) as string
  end if
–set resURL to ((|URL| of bRes)’s |absoluteURL|()’s absoluteString()) as string
if bRes is not equal to missing value then
    set hRes to (bRes’s allHeaderFields())
set aResCode to (bRes’s statusCode()) as integer
    set hRes to {}
set aResCode to -1 –error
  end if
return {(aResCode = 200), hRes, dRes, resURL}
end checkURLResourceExistence

on extractLinksFromNaturalText(aString)
  set anNSString to current application’s NSString’s stringWithString:aString
set {theDetector, theError} to current application’s NSDataDetector’s dataDetectorWithTypes:(current application’s NSTextCheckingTypeLink) |error|:(reference)
set theMatches to theDetector’s matchesInString:anNSString options:0 range:{0, anNSString’s |length|()}
set theResults to theMatches’s valueForKey:"URL"
return theResults as list
end extractLinksFromNaturalText

on parseByDelim(aData, aDelim)
  set curDelim to AppleScript’s text item delimiters
set AppleScript’s text item delimiters to aDelim
set dList to text items of aData
set AppleScript’s text item delimiters to curDelim
return dList
end parseByDelim

★Click Here to Open This Script 

Please follow and like us:


メールアドレスが公開されることはありません。 * が付いている欄は必須項目です