언어 자료구조 알고리즘/파이썬(Python)

[python] 뉴스 검색 - 형태소 분석 (feat. 네이버 개발자센터)

언제나휴일 2020. 11. 10. 11:41
반응형

Main.py

Main.py
0.00MB

 

 

import urllib.request
import json

class Morpheme:
    def __init__(self,word):
        self.word = word
        self.ref = 1
    def Merge(self,other):
        if self.IsEqual(other):
            self.ref = self.ref + other.ref
    def IsEqual(self,other):
        return self.word ==  other.word

class MorphemeParser:
    @staticmethod
    def Parse(src):
        morphes = list() #반환할 형태소 컬렉션
        src = MorphemeParser.RemoveNonAlpha(src)
        msrc = src.split(' ')
        for elem in msrc:
            if str.isalpha(elem):
                morphes.append(Morpheme(elem))
        morphes = MorphemeParser.Merge(morphes)
        return morphes
    @staticmethod
    def Merge(morphes):
        remoes = list()
        for morph in morphes:
            rcnt = len(remoes)
            flag = False
            for index in range(0,rcnt):
                if remoes[index].word == morph.word:
                    remoes[index].Merge(morph)
                    flag = True
                    break
            if flag == False:
                remoes.append(morph)
        return remoes
    @staticmethod
    def RemoveNonAlpha(src):
        dest=""
        for elem in src:
            if str.isalpha(elem) or str.isspace(elem):
                dest += elem
        return dest
    @staticmethod
    def FindTag(src):
        s = src.index('<')
        e = src.index('>')
        return s,e
    @staticmethod
    def RemoveTag(src):
        try:
            while True:
                s,e = MorphemeParser.FindTag(src)
                src = src[0:s]+src[e+1:]
        except:
            return src    

client_id ="네이버 개발자센터에서 발급한 Client ID"
client_secret="네이버 개발자센터에서 발급한 Client Secret"
url = "https://openapi.naver.com/v1/search/news.json"
query = input("질의:")
query_param = "query="+urllib.parse.quote(query)

#쿼리문자열 구조
#사이트 주소?인자=값&인자=값
query_str = url+"?"+query_param

request = urllib.request.Request(query_str)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)

response = urllib.request.urlopen(request)
if response.getcode() != 200:
    print("실패!!!")
else:
    content = response.read()
    data = content.decode('utf-8')
    jdata = json.loads(data)
    total = jdata['total']
    print("검색 결과 개수:",total)

    i=0
    for item in jdata['items']:
        print("제목:",MorphemeParser.RemoveTag(item['title']))
        des = MorphemeParser.RemoveTag(item['description'])
        print(des)
        print("==============")
        res = MorphemeParser.Parse(des)
        fs = open("data"+str(i)+".txt","w")
        for morph in res:
            s = morph.word + ","+str(morph.ref)+"\n"
            fs.writelines(s)
        fs.close()
        i=i+1
반응형