whenever you make a mess like this, eventually you have clean it upend restore everything to its proper order
python遍历文件数
import sys
import os
count = 0
# 遍历文件夹
def walkFile(file):
for root, dirs, files in os.walk(file):
# root 表示当前正在访问的文件夹路径
# dirs 表示该文件夹下的子目录名list
# files 表示该文件夹下的文件list
# 遍历文件
for f in files:
global count
count += 1
#print(os.path.join(root, f))
os.path.join(root, f)
# 遍历所有的文件夹
for d in dirs:
#print(os.path.join(root, d))
os.path.join(root, d)
print("文件数量一共为:", count)
if __name__ == '__main__':
walkFile(r"/data/dfs/data/")
开始时间: 1636442653.8381789
此次分析的目录: /data/dfs/data/
文件数量一共为: 87416186
结束时间: 1636444145.756604
计算时间: 1491.9184250831604
[root@host-40 scripts]# python3 test.py
文件数量一共为: 87423691
pg优化方法
优化具体方法
1、SQL后面添加limit
2、禁用select *
3、优化like语句
4、避免在索引列上使用内置函数和表达式操作
5、对查询进行优化,应考虑在 where 及 order by 涉及的列上建立索引,尽量避免全表扫描
6、在适当的时候,使用only indexscan
7、避免排序
7.1、灵活使用集合运算符的 ALL 可选项
7.2、使用 EXISTS 代替 DISTINCT
7.3、在极值函数中使用索引(MAX/MIN)
7.4、在 GROUP BY 子句和 ORDER BY 子句中使用索引
8、删除冗余和重复索引
9、关于大量DELETE/UPDATE操作
10、where 子句中考虑使用默认值代替 null
11、合理使用exists&in
12、能写在 WHERE 子句里的条件不要写在 HAVING 子句里
13、用varchar代替char,合理设置varchar可变字段长度
14、where后字段值注意引号使用,易导致索引失效
15、当在 SQL 语句中连接多个表时,请使用表的别名,并把别名前缀于每一列上,这样语义更加清晰
16、索引不适合建在有大量重复数据的字段上,如性别这类型数据库字段
17、表关联不要太多
18、Inner join 、left join、right join,优先使用 Inner join,如果是 left join,左边表结果尽量小
18.1、分解关联查询
18.2、改写关联查询
19、减少中间表
19.1、灵活使用 HAVING 子句
19.2、需要对多个字段使用 IN 谓词时,将它们汇总到一处
20、应尽量避免在 where 子句中使用!=或<>操作符,否则将引擎放弃使用索引而进行全表扫描
21、使用多列索引时,注意索引列的顺序,一般遵循最左匹配原则
22、字段类型能用数值尽量用数值类型
23、长度很长的多字段联合主键用hash
24、禁用UUID作为主键
25、善用set、explain查看&调试执行计划
PG建立分区
# 建父表
create table sales_detail (
product_id int not null,
price numeric(12,2),
amount int not null,
sale_date date not null,
buyer varchar(40),
buyer_contarct text
);
# 建子表
create table sales_detail_y2014m01(check (sale_date >= date '2014-01-01' and sale_date < date '2014-02-01')) inherits (sales_detail);
create table sales_detail_y2014m02(check (sale_date >= date '2014-02-01' and sale_date < date '2014-03-01')) inherits (sales_detail);
create table sales_detail_y2014m03(check (sale_date >= date '2014-03-01' and sale_date < date '2014-04-01')) inherits (sales_detail);
create table sales_detail_y2014m04(check (sale_date >= date '2014-04-01' and sale_date < date '2014-05-01')) inherits (sales_detail);
create table sales_detail_y2014m05(check (sale_date >= date '2014-05-01' and sale_date < date '2014-06-01')) inherits (sales_detail);
create table sales_detail_y2014m06(check (sale_date >= date '2014-06-01' and sale_date < date '2014-07-01')) inherits (sales_detail);
create table sales_detail_y2014m07(check (sale_date >= date '2014-07-01' and sale_date < date '2014-08-01')) inherits (sales_detail);
create table sales_detail_y2014m08(check (sale_date >= date '2014-08-01' and sale_date < date '2014-09-01')) inherits (sales_detail);
create table sales_detail_y2014m09(check (sale_date >= date '2014-09-01' and sale_date < date '2014-10-01')) inherits (sales_detail);
create table sales_detail_y2014m10(check (sale_date >= date '2014-10-01' and sale_date < date '2014-11-01')) inherits (sales_detail);
create table sales_detail_y2014m11(check (sale_date >= date '2014-11-01' and sale_date < date '2014-12-01')) inherits (sales_detail);
create table sales_detail_y2014m12(check (sale_date >= date '2014-12-01' and sale_date < date '2015-01-01')) inherits (sales_detail);
# 字表索引
create index sale_detail_y2014m04_sale_date on sales_detail_y2014m01 (sale_date);
create index sale_detail_y2014m02_sale_date on sales_detail_y2014m02 (sale_date);
create index sale_detail_y2014m03_sale_date on sales_detail_y2014m03 (sale_date);
create index sale_detail_y2014m04_sale_date on sales_detail_y2014m04 (sale_date);
create index sale_detail_y2014m05_sale_date on sales_detail_y2014m05 (sale_date);
create index sale_detail_y2014m06_sale_date on sales_detail_y2014m06 (sale_date);
create index sale_detail_y2014m07_sale_date on sales_detail_y2014m07 (sale_date);
create index sale_detail_y2014m08_sale_date on sales_detail_y2014m08 (sale_date);
create index sale_detail_y2014m09_sale_date on sales_detail_y2014m09 (sale_date);
create index sale_detail_y2014m10_sale_date on sales_detail_y2014m10 (sale_date);
create index sale_detail_y2014m12_sale_date on sales_detail_y2014m12 (sale_date);
# 建触发器
CREATE OR REPLACE FUNCTION sale_detail_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN
IF ( NEW.sale_date >= DATE '2014-01-01' AND
NEW. sale_date < DATE '2014-02-01' ) THEN
INSERT INTO sales_detail_y2014m01 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-02-01' AND
NEW.sale_date < DATE '2014-03-01' ) THEN
INSERT INTO sales_detail_y2014m02 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-03-01' AND
NEW.sale_date < DATE '2014-04-01' ) THEN
INSERT INTO sales_detail_y2014m03 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-04-01' AND
NEW.sale_date < DATE '2014-05-01' ) THEN
INSERT INTO sales_detail_y2014m04 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-05-01' AND
NEW.sale_date < DATE '2014-06-01' ) THEN
INSERT INTO sales_detail_y2014m05 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-06-01' AND
NEW.sale_date < DATE '2014-07-01' ) THEN
INSERT INTO sales_detail_y2014m06 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-07-01' AND
NEW.sale_date < DATE '2014-08-01' ) THEN
INSERT INTO sales_detail_y2014m07 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-08-01' AND
NEW.sale_date < DATE '2014-09-01' ) THEN
INSERT INTO sales_detail_y2014m08 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-09-01' AND
NEW.sale_date < DATE '2014-10-01' ) THEN
INSERT INTO sales_detail_y2014m09 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-10-01' AND
NEW.sale_date < DATE '2014-11-01' ) THEN
INSERT INTO sales_detail_y2014m10 VALUES (NEW.*);
ELSIF (NEW.sale_date >= DATE '2014-11-01' AND
NEW.sale_date < DATE '2014-12-01' ) THEN
INSERT INTO sales_detail_y2014m11 VALUES (NEW.*);
ELSIF ( NEW.sale_date >= DATE '2014-12-01' AND
NEW.sale_date < DATE '2015-01-01' ) THEN
INSERT INTO sales_detail_y2014m12 VALUES (NEW.*);
ELSE
RAISE EXCEPTION 'Date out of range. Fix the
sale_detail_insert_trigger () function!';
END IF;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
CREATE TRIGGER insert_sale_detail_trigger
BEFORE INSERT ON sales_detail
FOR EACH ROW EXECUTE PROCEDURE
sale_detail_insert_trigger ();
# 数据插入:
insert into sales_detail values(1,43.12,1,date '2014-01-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-02-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-03-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-04-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-05-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-06-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-07-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-08-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-09-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-10-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-11-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-12-02', '李四', '四川yy');
insert into sales_detail values(1,43.12,1,date '2014-01-02', '李四', '四川yy');
# 数据测试
select * from sales_detail limit 10;
select * from sales_detail_y2014m01 limit 1;
规则实现触发器功能
CREATE RULE sales_detail_insert_y2014m01 AS
ON INSERT TO sales_detail WHERE
( sale_date >= DATE '2014-01-01' AND sale_date < DATE '2014-02-01' ) DO INSTEAD INSERT INTO sales_detail_y2014m01 VALUES (NEW.); CREATE RULE sales_detail_insert_y2014m02 AS ON INSERT TO sales_detail WHERE ( sale_date >= DATE '2014-02-01' AND sale_date < DATE '2014-03-01' ) DO INSTEAD INSERT INTO sales_detail_y2014m01 VALUES (NEW.); …. CREATE RULE sales_detail_insert_y2014m12 AS ON INSERT TO sales_detail WHERE ( sale_date >= DATE '2014-12-01' AND sale_date < DATE
'2015-01-01' )
DO INSTEAD
INSERT INTO sales_detail_y2014m12 VALUES (NEW.*);
case when
建表插入数据
create table score(
stu_code varchar(8) not null,
stu_name varchar(8) not null,
stu_sex int not null,
stu_score int not null)
;
insert into score values('xm','小明',0,88);
insert into score values('xl','小磊',0,55);
insert into score values('xf','小峰',0,45);
insert into score values('xh','小红',0,66);
insert into score values('xn','晓妮',0,77);
insert into score values('xy','小伊',0,99);
SELECT
SUM (CASE WHEN STU_SEX = 0 THEN 1 ELSE 0 END) AS MALE_COUNT, --男生数量统计
SUM (CASE WHEN STU_SEX = 1 THEN 1 ELSE 0 END) AS FEMALE_COUNT, --女生数量统计
SUM (CASE WHEN STU_SCORE >= 60 AND STU_SEX = 0 THEN 1 ELSE 0 END) AS MALE_PASS, --男生通过人数统计
SUM (CASE WHEN STU_SCORE >= 60 AND STU_SEX = 1 THEN 1 ELSE 0 END) AS FEMALE_PASS --女生数量统计
FROM score

postgres=# select (case when stu_sex =0 then 1 else 0 end) from thtf_students;
case
1
1
1
1
1
1
(6 rows)
postgres=# select case when stu_sex =0 then 1 else 0 end from thtf_students;
case
1
1
1
1
1
1
(6 rows)
select 学习
关联表
CREATE TABLE class(no int primary key, class_name varchar(40));
INSERT INTO class VALUES(1,'初二(1)班');
INSERT INTO class VALUES(2,'初二(2)班');
INSERT INTO class VALUES(3,'初二(3)班');
INSERT INTO class VALUES(4,'初二(4)班');
CREATE TABLE student(no int primary key, student_name varchar(40), age int, class_no int);
INSERT INTO student VALUES(1, '张三', 14, 1);
INSERT INTO student VALUES(2, '吴二', 15, 1);
INSERT INTO student VALUES(3, '李四', 13, 2);
INSERT INTO student VALUES(4, '吴三', 15, 2);
INSERT INTO student VALUES(5, '王二', 15, 3);
INSERT INTO student VALUES(6, '李三', 14, 3);
INSERT INTO student VALUES(7, '吴二', 15, 4);
INSERT INTO student VALUES(8, '张四', 14, 4);
1、标准
SELECT student_name, class_name FROM student,
class
WHERE student.class_no = class.no;
2、distinct
select distinct student_name, class_no from student, class;
3、
select student_name, class_name from student, class;
oldboy=# select * from student a, class b where a.class_no=b.no and b.class_name='初二(2)班';
no | student_name | age | class_no | no | class_name
----+--------------+-----+----------+----+------------
3 | 李四 | 13 | 2 | 2 | 初二(2)班
4 | 吴三 | 15 | 2 | 2 | 初二(2)班
(2 rows)
oldboy=# explain analyze select * from student a, class b where a.class_no=b.no and b.class_name='初二(2)班';
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Hash Join (cost=17.79..35.25 rows=3 width=212) (actual time=0.020..0.022 rows=2 loops=1)
Hash Cond: (a.class_no = b.no)
-> Seq Scan on student a (cost=0.00..15.90 rows=590 width=110) (actual time=0.007..0.008 rows=8 loops=1)
-> Hash (cost=17.75..17.75 rows=3 width=102) (actual time=0.008..0.008 rows=1 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> Seq Scan on class b (cost=0.00..17.75 rows=3 width=102) (actual time=0.005..0.006 rows=1 loops=1)
Filter: ((class_name)::text = '初二(2)班'::text)
Rows Removed by Filter: 3
Planning Time: 0.080 ms
Execution Time: 0.038 ms
(10 rows)
oldboy=# explain analyze select * from student where class_no in (select no from class where class_name='初二(2)班');
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Hash Join (cost=17.79..35.25 rows=3 width=110) (actual time=0.019..0.021 rows=2 loops=1)
Hash Cond: (student.class_no = class.no)
-> Seq Scan on student (cost=0.00..15.90 rows=590 width=110) (actual time=0.006..0.007 rows=8 loops=1)
-> Hash (cost=17.75..17.75 rows=3 width=4) (actual time=0.007..0.008 rows=1 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 9kB
-> Seq Scan on class (cost=0.00..17.75 rows=3 width=4) (actual time=0.005..0.005 rows=1 loops=1)
Filter: ((class_name)::text = '初二(2)班'::text)
Rows Removed by Filter: 3
Planning Time: 0.089 ms
Execution Time: 0.038 ms
(10 rows)
编码学习
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from urllib import request
import json
from datetime import datetime, timedelta
import time
def get_data():
url = 'https://search.51job.com/list/070200,000000,0000,00,9,99,java%25E5%25BC%2580%25E5%258F%2591,2,1.html'
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
req = request.Request(url, headers=headers)
response = request.urlopen(req)
print('===============', response.getcode(), type(response))
if response.getcode() == 200:
# 字节码
data = response.read()
print(type(data))
# 字节码转字符串
data = data.decode('utf-8')
print(type(data))
# print(data)
with open('index.html', mode='w', encoding='utf-8') as f:
f.write(data)
if __name__ == '__main__':
get_data()
收集linux数据
通过python收集linux主机的cpu、硬盘、内存等
root@ubuntu16:/server/scripts# cat collect_linux_info.py
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import subprocess
def collect():
filter_keys = ['Manufacturer', 'Serial Number', 'Product Name', 'UUID', 'Wake-up Type']
raw_data = {}
for key in filter_keys:
try:
res = subprocess.Popen("sudo dmidecode -t system|grep '%s'" % key, stdout=subprocess.PIPE, shell=True)
# result = res.stdout.read().decode()等于下面这两句
result_bytes = res.stdout.read()
result = result_bytes.decode()
# split分隔字符,变成列表
data_list = result.split(':')
if len(data_list) > 1:
raw_data[key] = data_list[1].strip()
else:
raw_data[key] = ''
except Exception as e:
raw_data[key] = ''
# 构造符合django的数据模型字段
data = {}
data['asset_type'] = 'server'
data['manufacturer'] = raw_data['Manufacturer']
data['sn'] = raw_data['Serial Number']
data['model'] = raw_data['Product Name']
data['uuid'] = raw_data['UUID']
data['wake_up_type'] = raw_data['Wake-up Type']
data.update(get_os_info())
data.update(get_cpu_info())
data.update(get_ram_info())
data.update(get_nic_info())
data.update(get_disk_info())
return data
def get_os_info():
"""
获取操作系统信息
:return:
"""
distributor = subprocess.Popen("lsb_release -a|grep 'Distributor ID'", stdout=subprocess.PIPE, shell=True)
distributor = distributor.stdout.read().decode().split(":")
release = subprocess.Popen("lsb_release -a|grep 'Description'", stdout=subprocess.PIPE, shell=True)
release = release.stdout.read().decode().split(":")
data_dic = {
"os_distribution": distributor[1].strip() if len(distributor) > 1 else "",
"os_release": release[1].strip() if len(release) > 1 else"",
"os_type": "Linux",
}
return data_dic
def get_cpu_info():
"""
获取cpu信息
:return
"""
raw_cmd = 'cat /proc/cpuinfo'
raw_data = {
'cpu_model': "%s |grep 'model name' |head -1 " % raw_cmd,
'cpu_count': "%s | grep 'processor'|wc -l" % raw_cmd,
'cpu_core_count': "%s | grep 'cpu cores' |awk -F : '{SUM +=$2} END {print SUM}'" % raw_cmd,
}
for key, cmd in raw_data.items():
try:
result = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
raw_data[key] = result.stdout.read().decode().strip()
except ValueError as e:
print(e)
raw_data[key] = ""
data = {
"cpu_count": raw_data["cpu_count"],
"cpu_core_count": raw_data["cpu_core_count"]
}
cpu_model = raw_data["cpu_model"].split(":")
return data
def get_ram_info():
"""
获取内存信息
:return:
"""
raw_data = subprocess.Popen("sudo dmidecode -t memory", stdout=subprocess.PIPE, shell=True)
raw_list = raw_data.stdout.read().decode().split("\n")
raw_ram_list = []
item_list = []
for line in raw_list:
if line.startswith("Memory Device"):
raw_ram_list.append(item_list)
item_list = []
else:
item_list.append(line.strip())
ram_list = []
for item in raw_ram_list:
item_ram_size = 0
ram_item_to_dic = {}
for i in item:
data = i.split(":")
if len(data) == 2:
key, v = data
if key == 'Size':
if v.strip() != "No Module Installed":
ram_item_to_dic['capacity'] = v.split()[0].strip()
item_ram_size = round(int(v.split()[0]))
else:
ram_item_to_dic['capacity'] = 0
if key == 'Type':
ram_item_to_dic['model'] = v.strip()
if key == 'Manufacturer':
ram_item_to_dic['manufacturer'] = v.strip()
if key == 'Serial Number':
ram_item_to_dic['sn'] = v.strip()
if key == 'Asset Tag':
ram_item_to_dic['asset_tag'] = v.strip()
if key == 'Locator':
ram_item_to_dic['slot'] = v.strip()
if item_ram_size == 0:
pass
else:
raw_list.append(ram_item_to_dic)
raw_total_size = subprocess.Popen("cat /proc/meminfo|grep MemTotal", stdout=subprocess.PIPE, shell=True)
raw_total_size = raw_total_size.stdout.read().decode().split(":")
ram_data = {'ram': ram_list}
if len(raw_total_size) == 2:
total_gb_size = int(raw_total_size[1].split()[0]) / 1024**2
ram_data['ram_size'] = total_gb_size
return ram_data
def get_nic_info():
"""
获取网卡信息
:return:
"""
raw_data = subprocess.Popen("ifconfig -a", stdout=subprocess.PIPE, shell=True)
raw_data = raw_data.stdout.read().decode().split("\n")
nic_dic = dict()
next_ip_line = False
last_mac_addr = None
for line in raw_data:
if next_ip_line:
next_ip_line = False
print(line)
nic_name = last_mac_addr.split()[0]
print(nic_name)
mac_addr = last_mac_addr.split("HWaddr")[1].strip()
raw_ip_addr = line.split("inet addr:")
raw_bcast = line.split("Bcast:")
raw_netmask = line.split("Mask:")
if len(raw_ip_addr) > 1:
ip_addr = raw_ip_addr[1].split()[0]
network = raw_bcast[1].split()[0]
netmask = raw_netmask[1].split()[0]
else:
ip_addr = None
network = None
netmask = None
if mac_addr not in nic_dic:
nic_dic[mac_addr] = {'name': nic_name,
'mac': mac_addr,
'net_mask': netmask,
'network': network,
'bonding': 0,
'model': 'unknown',
'ip_address': ip_addr,
}
else:
if '%s_bonding_addr' % (mac_addr,) not in nic_dic:
random_mac_addr = '%s_bonding_addr' % (mac_addr,)
else:
random_mac_addr = '%s_bonding_addr2' % (mac_addr,)
nic_dic[random_mac_addr] = {'name': nic_name,
'mac': random_mac_addr,
'net_mask': netmask,
'network': network,
'bonding': 1,
'model': 'unknown',
'ip_address': ip_addr,
}
if "HWaddr" in line:
next_ip_line = True
last_mac_addr = line
nic_list = []
for k, v in nic_dic.items():
nic_list.append(v)
return {'nic': nic_list}
def get_disk_info():
"""
获取存储信息。
本脚本只针对ubuntu中使用sda,且只有一块硬盘的情况。
具体查看硬盘信息的命令,请根据实际情况,实际调整。
如果需要查看Raid信息,可以尝试MegaCli工具。
:return:
"""
# raw_data = subprocess.Popen("sudo hdparm -i /dev/sda | grep Model", stdout=subprocess.PIPE, shell=True)
# raw_data = raw_data.stdout.read().decode()
# data_list = raw_data.split(",")
# print("======test%s=====" % data_list)
# model = data_list[0].split("=")[1]
# sn = data_list[2].split("=")[1].strip()
model = "自定ts牌子"
sn = "1870871010997809870960869"
size_data = subprocess.Popen("sudo fdisk -l /dev/sda | grep Disk|head -1", stdout=subprocess.PIPE, shell=True)
size_data = size_data.stdout.read().decode()
size = size_data.split(":")[1].strip().split(" ")[0]
result = {'physical_disk_driver': []}
disk_dict = dict()
disk_dict["model"] = model
disk_dict["size"] = size
disk_dict["sn"] = sn
result['physical_disk_driver'].append(disk_dict)
return result
if __name__ == "__main__":
data = collect()
print(data)
ER模型
亿图、Visio
curl发起连接
curl -v 远端IP:5222 -k -x “本机IP:10080”