学习Python第二天

上节复习

# 将counts转化成列表
items = list(counts.items())

打开文件(两种方式)

  • 一定要进行关闭的操作
f =  open(file='./threekingdom.txt',mode='r',encoding='utf-8')
words = f.read()
f.close()
print(words)
  • 该方式不用关闭
with open('./hongloumeng.txt','r',encoding='utf-8')as f:
    words = f.read()

词云绘制

from wordcloud import WordCloud
txt = 'wo shi shuai ge,ni shi ge didi'
WordCloud().generate(txt).to_file('python1.png')

三国词云绘制

from wordcloud import WordCloud
import jieba
import imageio
mask = imageio.imread('china.jpg')
with open('./threekingdom.txt','r',encoding='utf-8') as f:
    words = f.read()
words_list = jieba.lcut(words)
words_text = ' '.join(words_list)
WordCloud(
    background_color='white',
    width=800,
    height=600,
    font_path='msyh.ttc',
    mask=mask
).generate(words_text).to_file('sanguo.png')


sanguo.png

往字典里添加元素

     count[word] = 取出字典中原来的计数 + 1
      counts[word] = counts[word] + 1

counts[word] = counts.get(word, 0) + 1

自定义函数与lambda的替换

def sort_by_count(x):
    return x[1]
    def sort_by_count(x):
        return x[1]
        items.sort(key=sort_by_count, reverse=True)
# 等价于
     items.sort(key=lambda x:x[1],reverse=True)
     print(items)

常用的排序

#设计一个列表
li = []
for i in range(10):
    li.append(i)
print(li)
from random import shuffle
shuffle(li)
print('顺序打乱后', li)
# 第一种排序方式
li.sort(reverse=True)
print('排序后的列表', li)
stu_info = [
    {'name':'zhangsan', 'age':33},
    {'name':'lisi', 'age':8},
    {'name':'wangwu', 'age':66},
    {'name':'tianqi', 'age':17},
]
print(stu_info)
# def 函数名():
#     函数体
def sort_by_age(dict_info):
    return dict_info['age']
# 按照年龄进行正序排序
# key是接受一个函数名  用于指定 按照什么进行排序
stu_info.sort(key=sort_by_age,reverse=True)
print('排序后', stu_info)
def sort_by_name(dict_info):
    return dict_info['name']
stu_info.sort(key=sort_by_name,reverse=True)
print('按照名字排序后', stu_info)
stu_info = [
    ('zhangsan', 89),
    ('lisi', 7),
    ('wangwu ', 222),
    ('zhaoliu', 44)
]
print(stu_info)
def sort_by_tuple(x):
    return x[1]
stu_info.sort(key=sort_by_tuple)
print('排序后', stu_info)
交换两个变量
a = 10
b = 100
a, b = b, a
print('a = ', a)
print('b = ', b)
# 一次声明多个变量
c, d, e = 100, [90,33], 'hehe'
print(c)
print(d)
print(e)

top10

import jieba
from wordcloud import WordCloud
# 1、读取文件
with open('./threekingdom.txt', 'r', encoding='utf-8') as f:
    words = f.read()
    word_list = jieba.lcut(words)
    excludes = {"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议",
                "如何", "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下",
                "东吴", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人马", "不知",
                '孔明曰','玄德曰','刘备', '云长'}
    # print(word_list)
    print(len(word_list))
    # 定义一个字典 {'夏侯渊': 788, '不来':56}
    counts = {}
    for word in word_list:
        if len(word) == 1:
            continue
        else:
            # 往字典里添加元素
            # count[word] = 取出字典中原来的计数 + 1
            # counts[word] = counts[word] + 1
            counts[word] = counts.get(word, 0) + 1
    # print(counts)
    counts['孔明'] = counts['孔明'] + counts['孔明曰']
    counts['玄德'] = counts['玄德'] + counts['玄德曰'] + counts['刘备']
    counts['关公'] = counts['关公'] + counts['云长']
    # 删除靠前的与人名无关的词汇
    for word in excludes:
        del counts[word]
    # 将counts转化成列表
    items = list(counts.items())
#def sort_by_count(x):
#    return x[1]
# 等价于
    items.sort(key=lambda x:x[1],reverse=True)
    # def sort_by_count(x):
    #     return x[1]
    # items.sort(key=sort_by_count, reverse=True)
    # print(items)
    # 显示 计数 前20的词语
    role_list = []
    for i in range(10):
        #  拆包  序列解包
        role_name, count = items[i]
        print(role_name, count)
        # 给读代码的人看的, _ 代表并没有使用临时变量
        for _ in range(count):
            role_list.append(role_name)
    print(role_list)
    text = ' '.join(role_list)
    WordCloud(
        background_color='white',
        width=800,
        height=600,
        font_path='msyh.ttc',
        collocations=False    ).generate(text).to_file('top10.png')


top10.png

切片 python中的特色

name ='abcdefg'
 #左闭右开
 #获取 abc
print(name[0:4])
# a c e f
print(name[0:8:2])
# 起始值终止值 可以省略
print(name[::2])
# name
print(name[:])
# name进行反转  gfdecba
print(name[::-1])
# g e c a
print(name[::-2])
# f d b
print(name[-2::-2])
print(name[1::-2])

对分析的数据进行可视化展示

# 名字 来源于 matlab
from matplotlib import pyplot as plt
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 绘制条形图
import string
print(string.ascii_uppercase)
x = ["厂商{}".format(i) for i in string.ascii_uppercase[:5]]
print(x)
from random import randint
y = [randint(200, 300) for _ in range(5)]
print(y)
# 绘制 条形图
plt.bar(x, y)
plt.show()
  • plt.rcParams[“font.sans-serif”] = [‘SimHei’]
    plt.rcParams[‘axes.unicode_minus’] = False
    这两个语句可以防止乱码

你或许想:《去原作者写文章的地方

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!
Python
0 条回复 A 作者 M 管理员
    所有的伟大,都源于一个勇敢的开始!
欢迎您,新朋友,感谢参与互动!欢迎您 {{author}},您在本站有{{commentsCount}}条评论