上記でも JSON 形式で返ってきますが、chunk や tok 要素の中身が 1 つしかない時にリスト形式になっていない、feature がカンマ区切りの文字列(リスト形式でない)になっているなど少し不便です。
下記の様に処理を追加するとフォーマットを揃えることができます。
import CaboCha
import xmltodict
import json
c = CaboCha.Parser()
tree = c.parse('今日は天気がとても良いですね。')
xmltree = tree.toString(CaboCha.FORMAT_XML)
jsonobj = xmltodict.parse(xmltree, attr_prefix='', cdata_key='surface', dict_constructor=dict)
# 追記分 ↓
if jsonobj['sentence']: # sentence が存在する際に処理を行う
if type(jsonobj['sentence']['chunk']) is not list: # chunk を必ずリスト形式にする
jsonobj['sentence']['chunk'] = [jsonobj['sentence']['chunk']]
for chunk in jsonobj['sentence']['chunk']:
if type(chunk['tok']) is not list: # tok を必ずリスト形式にする
chunk['tok'] = [chunk['tok']]
for tok in chunk['tok']:
feature_list = tok['feature'].split(',') # feature をリスト形式に変換
tok['feature'] = feature_list
# 追記分 ↑
print(json.dumps(jsonobj, indent=2, ensure_ascii=False))
% cd Users/ユーザー名/Downloads
% ls
cabocha-0.69.tar.bz2
ファイルを解凍して、configure、make、make install を行います。
% tar xfv cabocha-0.69.tar.bz2
% cd cabocha-0.69
% ./configure --prefix=/usr/local/cabocha/0_69 --with-charset=UTF8 --with-posset=IPA
% make
% make install
実行時に「-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd」を渡すのですが、下記コードの様に「CaboCha.Parser('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')」としてあげれば OK です。
{
"kind": "youtube#commentThreadListResponse",
"etag": "tSw5WSiFS4IMMytcgoYXJ9zpu6I",
"nextPageToken": "QURTSl9pMDFKVnZtTFl0NFZOdnhaZFpXaFBOcWU5aDA0QWM5bDVpYk5oVTd1WDQwSDY1cU11OVBOZHNnWFNOTmNJby1Db1JpWno2Qnd5bw==",
"pageInfo": {
"totalResults": 5,
"resultsPerPage": 5
},
"items": [
{
"kind": "youtube#commentThread",
"etag": "An3zz04lgE7jUVO7VXXmqfdwFjk",
"id": "UgwDY44NUll4uiZXqqx4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "Kty1w2F4dTbXmakl-ywdK28vLEg",
"id": "UgwDY44NUll4uiZXqqx4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "SO gooood",
"textOriginal": "SO gooood",
"authorDisplayName": "fatt musiek",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwngpQ-20jVq0c-9aC-wDJ87aTKi2QvPLTRN2GXGRaw=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCl3ha3zwY9p6CemIZZXIdXQ",
"authorChannelId": {
"value": "UCl3ha3zwY9p6CemIZZXIdXQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-22T18:48:34Z",
"updatedAt": "2021-05-22T18:48:34Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "QVJH5RHTNij1fN5jRj_mNcDscHA",
"id": "Ugx8sUuwqKqPVG9eSuJ4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "Y_SsBrGGxQoLpcztQqND9wGarUc",
"id": "Ugx8sUuwqKqPVG9eSuJ4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "so what if really yuffie have met johnny hehe",
"textOriginal": "so what if really yuffie have met johnny hehe",
"authorDisplayName": "GregOrio Barachina",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnjgJE6zBYksYQWt8TmKlMDYOyG0t-BHPNWWmvUUPQ=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCUs2OJ4-KqYGS2EPJCDj7tQ",
"authorChannelId": {
"value": "UCUs2OJ4-KqYGS2EPJCDj7tQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-21T13:42:45Z",
"updatedAt": "2021-05-21T13:42:45Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "ggLtp9jtNyrqb3JSvzkvUDon7gg",
"id": "UgwP-4ucsrWh_iXJQMN4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "Raxyf3_Zw3ksZyGeWDt7_HHW8SA",
"id": "UgwP-4ucsrWh_iXJQMN4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "The Aerith and Cloud scene is much more meaningful than the one with Tifa. Still a good scene but come on...Aerith just appearing amongst the flowers and getting to see her again...priceless",
"textOriginal": "The Aerith and Cloud scene is much more meaningful than the one with Tifa. Still a good scene but come on...Aerith just appearing amongst the flowers and getting to see her again...priceless",
"authorDisplayName": "Maxx Doran",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnixfMDBxLt_TfUEjlpHhU-OvwE1vjCgpFBAVIMxjg=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCXcLTX_9fNHLVAMr_plxeqQ",
"authorChannelId": {
"value": "UCXcLTX_9fNHLVAMr_plxeqQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-18T01:50:04Z",
"updatedAt": "2021-05-18T01:50:04Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "ptUVfOGBkZDnUXKFoDaGnQ9Y-gw",
"id": "UgzTN_4ek7syWNNbCrB4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "L3vDroBKOAklgIqKSkcX_JvLn_g",
"id": "UgzTN_4ek7syWNNbCrB4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "You caught on to the magnify barrier idea so early. I was part way into hard mode before I thought of that.",
"textOriginal": "You caught on to the magnify barrier idea so early. I was part way into hard mode before I thought of that.",
"authorDisplayName": "Justin Edwards",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwngz1mU5zD3QHSRVU3jXTEZApnkYsmAzCKFXxUyD1w=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCO-oPQJCpNw87M6YbcuuFMw",
"authorChannelId": {
"value": "UCO-oPQJCpNw87M6YbcuuFMw"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-10T07:25:36Z",
"updatedAt": "2021-05-10T07:25:36Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "vfaqu09YbjpC_akz6riq0_XpSCw",
"id": "UgygOOysmSAraKnx81h4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "k8vIS0anrGkqCcFfyj0gnrUwXQI",
"id": "UgygOOysmSAraKnx81h4AaABAg",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "Y'know Max, you COULD have just run 5k steps in Aerith's garden, checked what the Materia did, and moved on. Or, maybe, look up an online guide, since by now I'm sure SOMEONE has posted one.",
"textOriginal": "Y'know Max, you COULD have just run 5k steps in Aerith's garden, checked what the Materia did, and moved on. Or, maybe, look up an online guide, since by now I'm sure SOMEONE has posted one.",
"authorDisplayName": "Soma Cruz the Demigod of Balance",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnilg2dkOBvJqeTbW34CBoxURHLWv78fnbCRkArv=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCNaiemmWvNbzfaQm5e3hyqA",
"authorChannelId": {
"value": "UCNaiemmWvNbzfaQm5e3hyqA"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-07T02:21:29Z",
"updatedAt": "2021-05-07T02:21:29Z"
}
},
"canReply": true,
"totalReplyCount": 1,
"isPublic": true
},
"replies": {
"comments": [
{
"kind": "youtube#comment",
"etag": "oMSJ1drDJreTmguX72NWydzfbcY",
"id": "UgygOOysmSAraKnx81h4AaABAg.9N10GKEX1209N2h_vQGc9Y",
"snippet": {
"videoId": "fdsaZ8EMR2U",
"textDisplay": "\u003ca href=\"https://www.youtube.com/watch?v=fdsaZ8EMR2U&t=38m03s\"\u003e38:03\u003c/a\u003e These things don't stagger? But each time they clone, they lose health, and the clones are much weaker. Damn, I see why you had trouble with these, Max.",
"textOriginal": "38:03 These things don't stagger? But each time they clone, they lose health, and the clones are much weaker. Damn, I see why you had trouble with these, Max.",
"parentId": "UgygOOysmSAraKnx81h4AaABAg",
"authorDisplayName": "Soma Cruz the Demigod of Balance",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnilg2dkOBvJqeTbW34CBoxURHLWv78fnbCRkArv=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCNaiemmWvNbzfaQm5e3hyqA",
"authorChannelId": {
"value": "UCNaiemmWvNbzfaQm5e3hyqA"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-07T18:08:01Z",
"updatedAt": "2021-05-07T18:08:01Z"
}
}
]
}
}
]
}
Comments
Comments ではコメント ID を直接指定してデータを取得します。上の例で取得した 5 つのコメント ID を「id」に指定してデータを取得してみます。
# -*- coding: utf-8 -*-
# Sample Python code for youtube.comments.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python
import os
import googleapiclient.discovery
def main():
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "YOUR_API_KEY"
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = DEVELOPER_KEY)
request = youtube.comments().list(
part="id,snippet",
id="UgwDY44NUll4uiZXqqx4AaABAg,Ugx8sUuwqKqPVG9eSuJ4AaABAg,UgwP-4ucsrWh_iXJQMN4AaABAg,UgzTN_4ek7syWNNbCrB4AaABAg,UgygOOysmSAraKnx81h4AaABAg"
)
response = request.execute()
print(response)
if __name__ == "__main__":
main()
下記の様な JSON が返ってきます。CommentThreads の方ではコメントが投稿された動画の ID や、コメントに対する返信も含まれていましたが、Comments の方には含まれません。
{
"kind": "youtube#commentListResponse",
"etag": "vwaB3KAa_Snb_GuTkkMYrlL7Jrg",
"items": [
{
"kind": "youtube#comment",
"etag": "E9ovRZPTGOUQzHb0AEiKA26EJxY",
"id": "UgwDY44NUll4uiZXqqx4AaABAg",
"snippet": {
"textDisplay": "SO gooood",
"textOriginal": "SO gooood",
"authorDisplayName": "fatt musiek",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwngpQ-20jVq0c-9aC-wDJ87aTKi2QvPLTRN2GXGRaw=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCl3ha3zwY9p6CemIZZXIdXQ",
"authorChannelId": {
"value": "UCl3ha3zwY9p6CemIZZXIdXQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-22T18:48:34Z",
"updatedAt": "2021-05-22T18:48:34Z"
}
},
{
"kind": "youtube#comment",
"etag": "FG5oWvmMF39kDNl_rlnzb0bsSWM",
"id": "Ugx8sUuwqKqPVG9eSuJ4AaABAg",
"snippet": {
"textDisplay": "so what if really yuffie have met johnny hehe",
"textOriginal": "so what if really yuffie have met johnny hehe",
"authorDisplayName": "GregOrio Barachina",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnjgJE6zBYksYQWt8TmKlMDYOyG0t-BHPNWWmvUUPQ=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCUs2OJ4-KqYGS2EPJCDj7tQ",
"authorChannelId": {
"value": "UCUs2OJ4-KqYGS2EPJCDj7tQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-21T13:42:45Z",
"updatedAt": "2021-05-21T13:42:45Z"
}
},
{
"kind": "youtube#comment",
"etag": "WQw2UyILXAhkYOAl-AKScZi1pCY",
"id": "UgwP-4ucsrWh_iXJQMN4AaABAg",
"snippet": {
"textDisplay": "The Aerith and Cloud scene is much more meaningful than the one with Tifa. Still a good scene but come on...Aerith just appearing amongst the flowers and getting to see her again...priceless",
"textOriginal": "The Aerith and Cloud scene is much more meaningful than the one with Tifa. Still a good scene but come on...Aerith just appearing amongst the flowers and getting to see her again...priceless",
"authorDisplayName": "Maxx Doran",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnixfMDBxLt_TfUEjlpHhU-OvwE1vjCgpFBAVIMxjg=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCXcLTX_9fNHLVAMr_plxeqQ",
"authorChannelId": {
"value": "UCXcLTX_9fNHLVAMr_plxeqQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-18T01:50:04Z",
"updatedAt": "2021-05-18T01:50:04Z"
}
},
{
"kind": "youtube#comment",
"etag": "Vchl7kutnRgZb-uKYjMNMrJQ2qQ",
"id": "UgzTN_4ek7syWNNbCrB4AaABAg",
"snippet": {
"textDisplay": "You caught on to the magnify barrier idea so early. I was part way into hard mode before I thought of that.",
"textOriginal": "You caught on to the magnify barrier idea so early. I was part way into hard mode before I thought of that.",
"authorDisplayName": "Justin Edwards",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwngz1mU5zD3QHSRVU3jXTEZApnkYsmAzCKFXxUyD1w=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCO-oPQJCpNw87M6YbcuuFMw",
"authorChannelId": {
"value": "UCO-oPQJCpNw87M6YbcuuFMw"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-10T07:25:36Z",
"updatedAt": "2021-05-10T07:25:36Z"
}
},
{
"kind": "youtube#comment",
"etag": "pbfhdpIgB5QCKnr4Inkm_U2wbjQ",
"id": "UgygOOysmSAraKnx81h4AaABAg",
"snippet": {
"textDisplay": "Y'know Max, you COULD have just run 5k steps in Aerith's garden, checked what the Materia did, and moved on. Or, maybe, look up an online guide, since by now I'm sure SOMEONE has posted one.",
"textOriginal": "Y'know Max, you COULD have just run 5k steps in Aerith's garden, checked what the Materia did, and moved on. Or, maybe, look up an online guide, since by now I'm sure SOMEONE has posted one.",
"authorDisplayName": "Soma Cruz the Demigod of Balance",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AAUvwnilg2dkOBvJqeTbW34CBoxURHLWv78fnbCRkArv=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCNaiemmWvNbzfaQm5e3hyqA",
"authorChannelId": {
"value": "UCNaiemmWvNbzfaQm5e3hyqA"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-05-07T02:21:29Z",
"updatedAt": "2021-05-07T02:21:29Z"
}
}
]
}
$ sudo apt update
$ sudo apt install git
Reading package lists... Done
Building dependency tree
Reading state information... Done
git is already the newest version (1:2.25.1-1ubuntu3.1).
git set to manually installed.
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.
% python manage.py inspectdb yt_analysis_07
# This is an auto-generated Django model module.
# You'll have to do the following manually to clean this up:
# * Rearrange models' order
# * Make sure each model has one field with primary_key=True
# * Make sure each ForeignKey and OneToOneField has `on_delete` set to the desired behavior
# * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table
# Feel free to rename the models, but don't rename db_table values or field names.
from django.db import models
class YtAnalysis07(models.Model):
channel_id = models.CharField(max_length=40, blank=True, null=True)
channel_name = models.TextField(blank=True, null=True)
view_count = models.BigIntegerField(blank=True, null=True)
like_count = models.IntegerField(blank=True, null=True)
dislike_count = models.IntegerField(blank=True, null=True)
favorite_count = models.IntegerField(blank=True, null=True)
comment_count = models.IntegerField(blank=True, null=True)
video_count = models.IntegerField(blank=True, null=True)
class Meta:
managed = False
db_table = 'yt_analysis_07'
%
from rest_framework import serializers
from .models import YtAnalysis07 # モデルをインポート
class APISerializer(serializers.ModelSerializer):
class Meta:
model = YtAnalysis07 # 使用するモデル
fields = '__all__' # 処理対象にするフィールド(今回は全項目)
ビューの作成
API のビューを作成します。ざっくり言うとモデルからデータを抽出し、シリアライザに渡して JSON に変換し、その JSON データをレスポンスとして返します。
class based view の場合
from rest_framework import viewsets
from .serializers import APISerializer
from .models import YtAnalysis07
class DataListView(viewsets.ModelViewSet):
queryset = YtAnalysis07.objects.all() # モデルからデータを抽出するクエリセット
serializer_class = APISerializer # 使用するシリアライザ
urls.py の設定は下記の様にします。
from django.urls import path
from . import views
app_name = 'api'
urlpatterns = [
path('data-list/', views.DataListView.as_view({'get': 'list'}), name="data-list"),
]
function based view の場合
from rest_framework.decorators import api_view
from rest_framework.response import Response
from .serializers import APISerializer
from .models import YtAnalysis07
@api_view(['GET']) # GET のみに対応
def dataList(request):
api_data = YtAnalysis07.objects.all() # モデルからデータを抽出する
serializer = APISerializer(api_data, many=True) # シリアライザにデータを渡す
return Response(serializer.data) シリアル可されたデータを return で返す
urls.py の設定は下記の様にします。
from django.urls import path
from . import views
app_name = 'api'
urlpatterns = [
path('data-list/', views.dataList, name="data-list"),
]
==> Searching for similarly named formulae...
These similarly named formulae were found:
mysql-client mysql-client@5.7
To install one of them, run (for example):
brew install mysql-client
Error: No available formula or cask with the name "mysqlclient".
==> Searching for a previously deleted formula (in the last month)...
Error: No previously deleted formula found.
==> Searching taps on GitHub...
Error: No formulae found in taps.
とりあえず一番初めのエラー「Error: No available formula or cask with the name "mysqlclient".」が気になる。
(venv) % pip install mysql
Collecting mysql
Downloading https://files.pythonhosted.org/packages/bf/5f/b574ac9f70811df0540e403309f349a8b9fa1a25d3653824c32e52cc1f28/mysql-0.0.2.tar.gz
Collecting mysqlclient (from mysql)
Downloading https://files.pythonhosted.org/packages/3c/df/59cd2fa5e48d0804d213bdcb1acb4d08c403b61c7ff7ed4dd4a6a2deb3f7/mysqlclient-2.0.3.tar.gz (88kB)
|████████████████████████████████| 92kB 5.7MB/s
Installing collected packages: mysqlclient, mysql
Running setup.py install for mysqlclient ... done
Running setup.py install for mysql ... done
Successfully installed mysql-0.0.2 mysqlclient-2.0.3
WARNING: You are using pip version 19.2.3, however version 21.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
(venv) %