Archive for the 'NSXMLParser' Category

2016/11/30 Yahoo! 形態素解析APIで日本語テキストを解釈

Yahoo!の形態素解析APIで、日本語テキストを形態素解析するAppleScriptです。

Yahoo!に開発者登録(無料)して、アプリケーションIDを取得し、リスト中のretAccessKey()ハンドラにアプリケーションIDを記入すると実行可能です。

単語ごとに「品詞」「よみがな」などを取得できます。辞書が充実しているためか、自分の名前も正しく単語として認識されました。

形態素解析エンジンはローカルに置いて、辞書をカスタマイズするべきだと思っていますが、Yahoo!のAPI(が備えている辞書)だとそれなりに使える感じがします。

Yahoo!のテキスト解析系APIはひととおり試してみましたが、

 校正支援API:漢字の誤変換は指摘してくれるが、助詞の間違いなどは指摘してくれない
 キーフレーズ抽出API:使えるかどうか評価が難しい
 かな漢字変換API:呼んで使えるが、使い道が難しい

この形態素解析APIが一番実用度が高そうな感じがします。

AppleScript名:Yahoo! 形態素解析APIで日本語テキストを解釈
– Created 2016-11-25 by Takaaki Naganoya
– 2016 Piyomaru Software
use AppleScript version “2.5″
use scripting additions
use framework “Foundation”

–http://developer.yahoo.co.jp/webapi/jlp/ma/v1/parse.html

–日本語形態素解析Web APIは、24時間以内で1つのアプリケーションIDにつき50000件のリクエストが上限となっています。また、1リクエストの最大サイズを100KBに制限 しています。

property dictStack : missing value – stack to hold array of dictionaries
property textInProgress : “” – string to collect text as it is found
property anError : missing value – if we get an error, store it here

set japaneseText to “私の名前は長野谷です。”

set reqURLStr to “http://jlp.yahooapis.jp/MAService/V1/parse”

set aKey to retAccessKey() of me

set aRec to {|key|:aKey, sentence:japaneseText, results:“ma”, page:“1″, output:“xml”, appid:aKey}
set aURL to retURLwithParams(reqURLStr, aRec) of me
set aRes to callRestGETAPIAndParseXMLResults(aURL) of me

set aRESCode to responseCode of aRes
if aRESCode is not equal to 200 then return false

set aRESHeader to responseHeader of aRes
set aXMLres to (xml of aRes)

set parsedList to (aXMLres’s valueForKeyPath:“ResultSet.ma_result.word_list.word.surface.contents”) as list
–>  {”私”, “の”, “名前”, “は”, “長野谷”, “です”, “。”}

set yomiganaList to (aXMLres’s valueForKeyPath:“ResultSet.ma_result.word_list.word.reading.contents”) as list
–>  {”わたし”, “の”, “なまえ”, “は”, “ながのや”, “です”, “。”}

set kindList to (aXMLres’s valueForKeyPath:“ResultSet.ma_result.word_list.word.pos.contents”) as list
–>  {”名詞”, “助詞”, “名詞”, “助詞”, “名詞”, “助動詞”, “特殊”}

–GET methodのREST APIを呼ぶ
on callRestGETAPIAndParseXMLResults(aURL)
  set aRequest to current application’s NSMutableURLRequest’s requestWithURL:(current application’s |NSURL|’s URLWithString:aURL)
  
  
aRequest’s setHTTPMethod:“GET”
  
aRequest’s setCachePolicy:(current application’s NSURLRequestReloadIgnoringLocalCacheData)
  
aRequest’s setHTTPShouldHandleCookies:false
  
aRequest’s setTimeoutInterval:60
  
aRequest’s setValue:“application/json” forHTTPHeaderField:“Accept”
  
  
set aRes to current application’s NSURLConnection’s sendSynchronousRequest:aRequest returningResponse:(reference) |error|:(missing value)
  
set resList to aRes as list
  
  
set bRes to contents of (first item of resList)
  
set resStr to current application’s NSString’s alloc()’s initWithData:bRes encoding:(current application’s NSUTF8StringEncoding)
  
  
set aXmlRec to my makeRecordWithXML:resStr
  
  
  
–Get Response Code & Header
  
set dRes to contents of second item of resList
  
if dRes is not equal to missing value then
    set resCode to (dRes’s statusCode()) as number
    
set resHeaders to (dRes’s allHeaderFields()) as record
  else
    set resCode to 0
    
set resHeaders to {}
  end if
  
  
return {xml:aXmlRec, responseCode:resCode, responseHeader:resHeaders}
end callRestGETAPIAndParseXMLResults

on retURLwithParams(aBaseURL, aRec)
  set aDic to current application’s NSMutableDictionary’s dictionaryWithDictionary:aRec
  
  
set aKeyList to (aDic’s allKeys()) as list
  
set aValList to (aDic’s allValues()) as list
  
set aLen to length of aKeyList
  
  
set qList to {}
  
repeat with i from 1 to aLen
    set aName to contents of item i of aKeyList
    
set aVal to contents of item i of aValList
    
set the end of qList to (current application’s NSURLQueryItem’s queryItemWithName:aName value:aVal)
  end repeat
  
  
set aComp to current application’s NSURLComponents’s alloc()’s initWithString:aBaseURL
  
aComp’s setQueryItems:qList
  
set aURL to (aComp’s |URL|()’s absoluteString()) as text
  
  
return aURL
end retURLwithParams

on retAccessKey()
  return “xxXxxxXxXXxxxXXXXXXXXXXXXXXxXXXxxxXXxXXxxXXxxxXXXxxXXXX-” –Yahoo! API Key
end retAccessKey

on urlencodeStr(aStr)
  set aString to current application’s NSString’s stringWithString:aStr
  
set aString to (aString’s stringByAddingPercentEncodingWithAllowedCharacters:(current application’s NSCharacterSet’s URLQueryAllowedCharacterSet())) as text
  
return aString
end urlencodeStr

——–XMLParse Lib

on makeRecordWithXML:xmlString
  set my dictStack to current application’s NSMutableArray’s array() – empty mutable array
  
set anEmpty to current application’s NSMutableDictionary’s |dictionary|()
  (
my dictStack)’s addObject:anEmpty – add empty mutable dictionary
  
set my textInProgress to current application’s NSMutableString’s |string|() – empty mutable string
  
  
set anNSString to current application’s NSString’s stringWithString:xmlString
  
set theData to anNSString’s dataUsingEncoding:(current application’s NSUTF8StringEncoding)
  
  
set theNSXMLParser to current application’s NSXMLParser’s alloc()’s initWithData:theData
  
  
theNSXMLParser’s setDelegate:me
  
  
set theResult to theNSXMLParser’s parse()
  
if theResult then – went OK, get first item on stack
    return ((my dictStack)’s firstObject()) –as record
  else
    error (my anError’s localizedDescription() as text)
  end if
end makeRecordWithXML:

– this is an XML parser delegate method. Called when new element found
on parser:anNSXMLParser didStartElement:elementName namespaceURI:aString qualifiedName:qName attributes:aRecord
  set parentDict to my dictStack’s lastObject()
  
set childDict to current application’s NSMutableDictionary’s |dictionary|()
  
if aRecord’s |count|() > 0 then
    childDict’s setValue:aRecord forKey:“attributes”
  end if
  
  
set existingValue to parentDict’s objectForKey:elementName
  
  
if existingValue is not missing value then
    if (existingValue’s isKindOfClass:(current application’s NSMutableArray)) as boolean then
      set theArray to existingValue
    else
      set theArray to current application’s NSMutableArray’s arrayWithObject:existingValue
      
parentDict’s setObject:theArray forKey:elementName
    end if
    
    
theArray’s addObject:childDict
  else
    parentDict’s setObject:childDict forKey:elementName
  end if
  
  (
my dictStack)’s addObject:childDict
end parser:didStartElement:namespaceURI:qualifiedName:attributes:

– this is an XML parser delegate method. Called at the end of an element
on parser:anNSXMLParser didEndElement:elementName namespaceURI:aString qualifiedName:qName
  if my textInProgress’s |length|() > 0 then
    set dictInProgress to my dictStack’s lastObject()
    
dictInProgress’s setObject:textInProgress forKey:“contents”
    
set my textInProgress to current application’s NSMutableString’s |string|()
  end if
  
  
my dictStack’s removeLastObject()
end parser:didEndElement:namespaceURI:qualifiedName:

– this is an XML parser delegate method. Called when string is found. May be called repeatedly
on parser:anNSXMLParser foundCharacters:aString
  if (aString’s stringByTrimmingCharactersInSet:(current application’s NSCharacterSet’s whitespaceAndNewlineCharacterSet()))’s |length|() > 0 then
    (my textInProgress)’s appendString:aString
  end if
end parser:foundCharacters:

– this is an XML parser delegate method. Called when there’s an error
on parser:anNSXMLParser parseErrorOccurred:anNSError
  set my anError to anNSError
end parser:parseErrorOccurred:

★Click Here to Open This Script 

2016/11/06 XMLをrecordにv2

XMLをrecordに変換するAppleScriptです。

以前にAppleScript-Users ML上で流れていたXML→record変換のAppleScriptでしたが、動作確認を行ってもうまく動かず、そのまま放置状態になっていました。

見直してみたところ、「NSMutableDictionary’s dictionary()」というカラのmutable dictionaryを作成する部分が、うまくAppleScriptの処理系に認識されていなかったようでした。少し書き直してみました。

AppleScript名:XMLをrecordにv2
–2015 Shane Stanley & Alex Zavatone
– Modified 2016-11-06 by Takaaki Naganoya
use AppleScript version “2.4″
use scripting additions
use framework “Foundation”
–http://piyocast.com/as/archives/4306

property dictStack : missing value – stack to hold array of dictionaries
property textInProgress : “” – string to collect text as it is found
property anError : missing value – if we get an error, store it here

set xmlString to “< ?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>

Saga
Nor
én
Malm
ö
Martin
Rohde
K
øbenhavn

set xmlRes to my makeRecordWithXML:xmlString
–> {|character|:{firstName:{|contents|:”Saga”}, lastName:{|contents|:”Norén”}, city:{|contents|:”Malmö“}, partner:{firstName:{|contents|:”Martin”}, lastName:{|contents|:”Rohde”}, city:{|contents|:”København”}, attributes:{approach:”dogged”}}}}

on makeRecordWithXML:xmlString
  – set up properties
  
set my dictStack to current application’s NSMutableArray’s array() – empty mutable array
  
set anEmpty to current application’s NSMutableDictionary’s |dictionary|()
  (
my dictStack)’s addObject:anEmpty – add empty mutable dictionary
  
set my textInProgress to current application’s NSMutableString’s |string|() – empty mutable string
  
  
– convert XML from string to data
  
set anNSString to current application’s NSString’s stringWithString:xmlString
  
set theData to anNSString’s dataUsingEncoding:(current application’s NSUTF8StringEncoding)
  
  
– initialize an XML parser with the data
  
set theNSXMLParser to current application’s NSXMLParser’s alloc()’s initWithData:theData
  
  
– set this script to be the parser’s delegate
  
theNSXMLParser’s setDelegate:me
  
  
– tell it to parse the XML
  
set theResult to theNSXMLParser’s parse()
  
if theResult then – went OK, get first item on stack
    return ((my dictStack)’s firstObject()) as record
  else – error, so return error
    error (my anError’s localizedDescription() as text)
  end if
end makeRecordWithXML:

– this is an XML parser delegate method. Called when new element found
on parser:anNSXMLParser didStartElement:elementName namespaceURI:aString qualifiedName:qName attributes:aRecord
  – store reference to last item on the stack
  
set parentDict to my dictStack’s lastObject()
  
  
– make new child
  
set childDict to current application’s NSMutableDictionary’s |dictionary|()
  
  
– if there are attributes, add them as a record with key “attributes”
  
if aRecord’s |count|() > 0 then
    childDict’s setValue:aRecord forKey:“attributes”
  end if
  
  
– see if there’s already an item for this key
  
set existingValue to parentDict’s objectForKey:elementName
  
  
if existingValue is not missing value then
    – there is, so if it’s an array, store it…
    
if (existingValue’s isKindOfClass:(current application’s NSMutableArray)) as boolean then
      set theArray to existingValue
    else
      – otherwise create an array and add it
      
set theArray to current application’s NSMutableArray’s arrayWithObject:existingValue
      
parentDict’s setObject:theArray forKey:elementName
    end if
    
    
– then add the new dictionary to the array
    
theArray’s addObject:childDict
  else
    – add new dictionary directly to the parent
    
parentDict’s setObject:childDict forKey:elementName
  end if
  
  
– also add the new dictionary to the end of the stack
  (
my dictStack)’s addObject:childDict
end parser:didStartElement:namespaceURI:qualifiedName:attributes:

– this is an XML parser delegate method. Called at the end of an element
on parser:anNSXMLParser didEndElement:elementName namespaceURI:aString qualifiedName:qName
  – if any text has been stored, add it as a record with key “contents”
  
if my textInProgress’s |length|() > 0 then
    set dictInProgress to my dictStack’s lastObject()
    
dictInProgress’s setObject:textInProgress forKey:“contents”
    
    
– reset textInProgress property for next element
    
set my textInProgress to current application’s NSMutableString’s |string|()
  end if
  
  
– remove last item from the stack
  
my dictStack’s removeLastObject()
end parser:didEndElement:namespaceURI:qualifiedName:

– this is an XML parser delegate method. Called when string is found. May be called repeatedly
on parser:anNSXMLParser foundCharacters:aString
  – only append string if it’s not solely made of space characters (which should be, but aren’t, caught by another delegate method)
  
if (aString’s stringByTrimmingCharactersInSet:(current application’s NSCharacterSet’s whitespaceAndNewlineCharacterSet()))’s |length|() > 0 then
    (my textInProgress)’s appendString:aString
  end if
end parser:foundCharacters:

– this is an XML parser delegate method. Called when there’s an error
on parser:anNSXMLParser parseErrorOccurred:anNSError
  set my anError to anNSError
end parser:parseErrorOccurred:

★Click Here to Open This Script