0. 패키지 설치
0.1 konlpy 설치pip install konlpy # Python 2.x pip3 install konlpy # Python 3.x
0.2 PyTagCloud 설치
기본 PyTagCloud는 한글이 그려지지 않기 때문에 Lucy Park님이 수정해 주신 PyTagCloud-CJK를 설치해야 함.
git clone https://github.com/e9t/PyTagCloud-CJK.git cd PyTagCloud python setup.py install
1. 코드 작성
from collections import Counter import random import webbrowser from konlpy.tag import Hannanum from lxml import html import pytagcloud # requires Korean font support import sys import requests r = lambda: random.randint(0,255) color = lambda: (r(), r(), r()) def get_bill_text_daum(): url = 'https://media.daum.net/' response = requests.get(url) text = response.text page = html.fromstring(text) all_text = page.xpath('//strong[@class="tit_g"]//a/text()') corpus = '' for text in all_text: text = text.replace('\r','').replace('\n','').replace('\t','') corpus = corpus + ' ' +text return corpus def get_tags(text, ntags=50, multiplier=10): h = Hannanum() nouns = h.nouns(text) count = Counter(nouns) return [{ 'color': color(), 'tag': n, 'size': c*multiplier }\ for n, c in count.most_common(ntags)] def draw_cloud(tags, filename, fontname='Noto Sans CJK', size=(800, 600)): pytagcloud.create_tag_image(tags, filename, fontname=fontname, size=size) webbrowser.open(filename) text = get_bill_text_daum() tags = get_tags(text) draw_cloud(tags, 'wordcloud_daum.png')
2. 결과
3. ref
http://konlpy.org/ko/latest/examples/wordcloud/
댓글 없음:
댓글 쓰기