반응형
뉴스 테이블
CREATE TABLE [dbo].News ( [nid] INT NOT NULL PRIMARY KEY IDENTITY, [title] VARCHAR(200) NOT NULL, [link] VARCHAR(200) NOT NULL, [description] VARCHAR(MAX) NOT NULL, [pubdate] DATETIME NOT NULL, [mcnt] int NOT NULL CONSTRAINT TU UNIQUE(link) ) |
형태소 테이블
CREATE TABLE [dbo].Morpheme ( [mid] INT NOT NULL PRIMARY KEY IDENTITY, [word] VARCHAR(50) NOT NULL, CONSTRAINT WU UNIQUE(word) ) |
역참조 테이블(역파일)
CREATE TABLE [dbo].Inverse ( [nid] INT NOT NULL, [mid] INT NOT NULL, [rcnt] INT NOT NULL ) |
관계 추가
News.py
#News.py
from MorphemeParser import MorphemeParser
from EHHelper import EHHelper
import datetime
class News:
def __init__(self,title,link,description,pubdate):
self.title = title
self.link = link
self.description = description
self.pubdate = pubdate
def PreProcess(self):
self.title = EHHelper.EmitTagAndSpecialCh(self.title)
self.description = EHHelper.EmitTagAndSpecialCh(self.description)
@staticmethod
def MakeNews(jnews):
title = jnews['title']
link = jnews['link']
description = jnews['description']
pubdate = jnews['pubDate']
try:
index = pubdate.rfind('+')
pubdate = pubdate[:index]
dt = datetime.datetime.strptime(src,"%a, %d %b %Y %H:%M:%S ")
except:
pubdate = datetime.datetime.now()
else:
pubdate = dt
return News(title,link,description,pubdate)
Morpheme.py
#Morpheme.py - 형태소 클래스
class Morpheme:
def __init__(self,word):
self.word = word #단어
self.ref = 1 #참조 개수
def Merge(self,other): # 병합하기
if self.IsEqual(other):
self.ref = self.ref + other.ref
def IsEqual(self,other): #같은 단어를 갖는 형태소인지 판별
return self.word == other.word
NewsSql.py
#NewsSql.py
from News import News
import pymssql
class NewsSql:
@staticmethod
def AddNews(news):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
dstr = news.pubdate.strftime('%Y-%m-%d %H:%M:%S')
q_pre = "insert into News (title,description,link,pubdate,mcnt)"
q_post = str.format("values('{0}','{1}','{2}','{3}',{4})",\
news.title, news.description,news.link,dstr,0)
query = str.format("{0} {1}",q_pre,q_post)
try:
cursor.execute(query)
conn.commit()
except:
temp=""
conn.close()
@staticmethod
def UpdateMCnt(link,mcnt):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("update News set mcnt={0} where (link='{1}')",mcnt,link)
cursor.execute(query)
conn.commit()
conn.close()
@staticmethod
def FindNid(link):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("select nid from News where (link='{0}')",link)
cursor.execute(query)
row = cursor.fetchone()
conn.commit()
conn.close()
return row[0]
@staticmethod
def FindNewsByNid(nid):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
q_pre = "select title,link,description,pubdate,mcnt from News "
q_post = str.format(" where (nid={0})",nid)
query = q_pre + q_post
cursor.execute(query)
row = cursor.fetchone()
conn.close()
return row
@staticmethod
def TotalDocumentCount():
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = "select count(*) from News"
cursor.execute(query)
row = cursor.fetchone()
conn.close()
return row[0]
MorphemeSql.py
#MorphemeSql.py
import pymssql
class MorphemeSql:
@staticmethod
def AddMorpheme(mo):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("Insert into Morpheme(word) values('{0}')",mo.word)
try:
cursor.execute(query)
conn.commit()
except:
temp=""
conn.close()
@staticmethod
def FindMid(word):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("select mid from Morpheme where (word='{0}')",word)
cursor.execute(query)
row = cursor.fetchone()
conn.commit()
conn.close()
if row:
return row[0]
return 0
@staticmethod
def ListMorpheme():
moes = list()
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("select word from Morpheme")
cursor.execute(query)
row = cursor.fetchone()
while row:
moes.append(row[0])
row = cursor.fetchone()
conn.close()
return moes
InverseSql.py
#InverseSql.py
import pymssql
class InverseSql:
@staticmethod
def AddInverseItem(nid,mid,rcnt):
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
s_pre = "Insert into Inverse(nid,mid,rcnt) values"
s_post = str.format("({0},{1},{2})",nid,mid,rcnt)
query = s_pre+s_post
try:
cursor.execute(query)
conn.commit()
except:
temp=""
conn.close()
@staticmethod
def FindInv(mid):
inv_col = list()
conn = pymssql.connect("127.0.0.1:1433", "sa","1234","BigPro")
cursor = conn.cursor()
query = str.format("Select nid,rcnt from Inverse where mid={0}",mid)
cursor.execute(query)
row = cursor.fetchone()
while row:
inv_col.append(row)
row = cursor.fetchone()
conn.close()
return inv_col
NewsSearcher.py
#NewsSearcher.py - 뉴스 검색기
from MorphemeParser import MorphemeParser
from Morpheme import Morpheme
import urllib.request
import json
class NewsSearcher:
def __init__(self):
self.client_id ="네이버에서 제공한 클라이언트 ID"
self.client_secret="네이버에서 제공한 클라이언트 Secret"
self.url = "https://openapi.naver.com/v1/search/news.json"
def SetQuery(self,query):
query = urllib.parse.quote(query)
self.qp = "query="+query
def Request(self,start,display):
sp = "start="+str(start)
dp = "display="+str(display)
query_str = self.url+"?"+self.qp+"&"+sp+"&"+dp
request = urllib.request.Request(query_str)
request.add_header("X-Naver-Client-Id",self.client_id)
request.add_header("X-Naver-Client-Secret",self.client_secret)
try:
response = urllib.request.urlopen(request)
except: #예외 발생하였을 때
return list(),0
if response.getcode()!=200:#실패일 때
return list(),0
content = response.read()
content = content.decode('utf-8')
jdata = json.loads(content)
total = int(jdata['total'])
return jdata['items'],total
def RequestAll(self):
start = 1
display = 100
redatas = list()
datas, total = self.Request(start,display)
redatas.extend(datas)
start = start + display
while start<total and start<1000:
datas, total = self.Request(start,display)
redatas.extend(datas)
start = start + display
return redatas
MorphemeParser.py
#MorphemeParser.py - 형태소 분석기
from Morpheme import Morpheme
from EHHelper import EHHelper
class MorphemeParser:
@staticmethod
def Parse(src):
morphes = list()
#원본 문자열에 특수 기호를 제거 및 공백 기준으로 분리
src = EHHelper.EmitTagAndSpecialCh(src)
msrc = src.split(' ')
#각 단어를 형태소 컬렉션에 추가
for elem in msrc:
if str.isalpha(elem):
morphes.append(Morpheme(elem))
#중복 형태소를 합치는 공정
morphes = MorphemeParser.Merge(morphes)
return morphes
@staticmethod
def Merge(morphes):
remoes = list()#병합한 형태소를 보관할 컬렉션
for morph in morphes:#원본 컬렉션에 있는 각각의 형태소를
rcnt = len(remoes)#병합한 컬렉션에 형태소 개수 구하기
flag = False #morph와 같은 단어가 remoes에 없다고 가정
#morph가 remoes컬렉션에 있다면 병합
for index in range(0,rcnt):
if remoes[index].word == morph.word:
remoes[index].Merge(morph)
flag = True#병합하였음을 마킹
break
if flag == False:#morph와 같은 단어는 remoes에 없음
remoes.append(morph)
return remoes
NewsAnalyser.py
#NewsAnaylizer.py - 뉴스 분석기
from NewsSearcher import NewsSearcher
from MorphemeParser import MorphemeParser
from Morpheme import Morpheme
from News import News
class NewsAnaylizer:
def __init__(self):
self.ns = NewsSearcher()
def Analize(self, query):
redata = list()
self.ns.SetQuery(query)
#news_col = self.ns.RequestAll()
news_col,total = self.ns.Request(1,10)
for jnews in news_col:
news = News.MakeNews(jnews)
morphes1 = MorphemeParser.Parse(news.title)
morphes2 = MorphemeParser.Parse(news.description)
redata.append([news,morphes1,morphes2])
return redata
Main.py
from NewsAnaylizer import NewsAnaylizer
from NewsSql import NewsSql
from MorphemeParser import MorphemeParser
from News import News
from MorphemeSql import MorphemeSql
from InverseSql import InverseSql
na = NewsAnaylizer()
q = input('질의:')
adatas = na.Analize(q)
for adata in adatas:
news,m1,m2 = adata
news.PreProcess()
NewsSql.AddNews(news)
NewsSql.UpdateMCnt(news.link,len(m1)+len(m2))
nid = NewsSql.FindNid(news.link)
for mo in m1:
MorphemeSql.AddMorpheme(mo)
mid = MorphemeSql.FindMid(mo.word)
InverseSql.AddInverseItem(nid,mid,mo.ref)
for mo in m2:
MorphemeSql.AddMorpheme(mo)
mid = MorphemeSql.FindMid(mo.word)
InverseSql.AddInverseItem(nid,mid,mo.ref)
반응형
'언어 자료구조 알고리즘 > 파이썬(Python)' 카테고리의 다른 글
[python] 뉴스 검색기V04 feat.네이버 개발자센터 (0) | 2020.11.11 |
---|---|
[ python] 뉴스 분석기 feat.네이버 개발자센터 , 형태소 분석 (0) | 2020.11.10 |
[python] 뉴스 검색 - 형태소 분석 (feat. 네이버 개발자센터) (0) | 2020.11.10 |
[python] 도서 검색기 feat. 네이버 개발자센터 (0) | 2020.11.09 |
[python] 네이버 도서 검색 API 활용 - Json (0) | 2020.11.09 |
파이썬에서 별도의 선택문은 없어요. 대신 elif를 이용하세요. (0) | 2020.10.22 |
[python] 13. 리스트의 요소 개수 알아내기 및 정렬하기 (3) | 2016.05.31 |
[python] 12. 리스트에서 자료 삭제하기 (0) | 2016.05.31 |
[python] 11. 리스트에 자료를 추가하기 (0) | 2016.05.23 |
[python] 10. 파이썬을 잘 사용하기 위한 첫 걸음, 리스트를 소개합니다. (0) | 2016.05.20 |