try: f = open("c:\\tmp\\ts.txt", "r") #若文件不存在,会产生异常,跳到except后面执行 lines = f.readlines() f.close() for x in lines: print(x, end="") except Exception as e: print(e) #>> [Errno 2] No such file or directory: 'c:\\tmp\\ts. txt '
添加文件内容
1 2 3 4
f = open("c:\\tmp\\t.txt", "a") #"a"要打开文件添加内容。若文件本来不存在,就创建文件 f.write("新增行\n") f.write("ok\n") f.close()
import os defpowerRmDir(path): # 连根删除文件夹path lst = os.listdir(path) for x in lst: actualFileName = path + "/" + x #x不包括路径,例如a. txt if os.path.isfile(actualFileName): #actualFileName是文件 os.remove(actualFileName) else: powerRmDir(actualFileName) #actualFileName是文件夹 os.rmdir(path)
import os defgetTotalSize(path): total = 0 lst = os.listdir(path) for x in lst: actualFileName = path + "/" + x #x不包括路径 if OS.path.isfile(actualFileName): total += OS.path.getsize(actualEileName) else: total += getTotalSize(actualFileName) return total
When many couples decide to expand their family,they often take into consideration the different genetic traits that they may pass on to their children. For example, if someone has a history of heart problems, they might be concerned about passing that on to their chi Idren as well. Treacher Collins syndrome, or TCS, ?is a rare facial disfigurement that greatly: slows: the deve lopment of bones and other t issues that make up the human face. As a result, most people living?with TCS have?under developed cheek bones, a small jaw, and an undersized chin.
统计的结果结果文件r1.txt格式
1 2 3 4 5 6 7 8 9
a 8 about 2 an 1 and 4 are 1 around 1 as 2 backlash 1 be 4
import os #ython自带os库 os.listdir() #可以获得当前文件夹下所有文件和文件夹的列表。列表中元素是文件或文件夹名字,不带路径(目录) os.path.isfile(x) #可以判断x是不是一个文件(文件夹不是文件)
os.listdir示例
假设c:\tmp文件夹下有文件t.py,a.txt,b.txt和文件夹hello
程序t.py如下:
1 2
import os print(os.listdir())
则运行t.py输出结果为: ['a.txt', 'b.txt', 'hello', 't.py']
实现
1 2 3 4 5 6
result = {} lst = os.listdir() #列出当前文件夹下所有文件和文件夹的名字 for x in lst: if os.path.isfile(x) : #如果x是文件 if x.lower().endswith(".txt") and x.lower().startswith("a"): #x是'a'开头, .txt结尾 countFile(x, result) #countFile是程序1中统计一个文件的函数
程序3:准确统计文章中的单词词频
程序名:countfile_novary.py
用法:
python countfile_novary.py 源文件 结果文件
对”源文件”进行单词词频分析,分析结果写入”结果文件”如果碰到单词的变化形式,则转换成原型再统计
单词原型-变化 词汇表在文件word_varys.txt里面,格式:
1 2 3 4 5 6
act acted|acting|acts action actions active actively|activeness
defmakeVaryWordsDict(): vary_words = {} # 元素形式: 变化形式:原型 例如{acts:act,acting:act,boys:boy....} f = open("word_varys.txt", "r", encoding="gbk") lines = f.readlines() f.close() L = len(lines) for i inrange(0, L, 2): # 每两行是一个单词的原型及变化形式 word = lines[i].strip() # 单词原型 varys = lines[i + 1].strip().split("|") # 变形 for w in varys: vary_words[w] = word # 加入变化形式:原型 , w的原型是 word return vary_words
defmakeSplitStr(txt): splitChars = set([]) # 下面找出所有文件中非字母的字符,作为分隔符 for c in txt: ifnot (c >= 'a'and c <= 'z'or c >= 'A'and c <= 'Z'): splitChars.add(c) splitStr = "" # 生成用于 re.split的分隔符字符串 for c in splitChars: if c in ['.', '?', '!', '"', "'", '(', ')', '|', '*', '$', '\\', '[', ']', '^', '{', '}']: splitStr += "\\" + c + "|" else: splitStr += c + "|" splitStr += " " return splitStr
defcountFile(filename, vary_word_dict): # 分析 filename 文件,返回一个词典作为结果。到 vary_word_dict里查单词原型 try: f = open(filename, "r", encoding="gbk") except Exception as e: print(e) returnNone txt = f.read() f.close() splitStr = makeSplitStr(txt) words = {} lst = re.split(splitStr, txt) for x in lst: lx = x.lower() if lx == "": continue if lx in vary_word_dict: # 如果在原型词典里能查到原型,就变成原型再统计 lx = vary_word_dict[lx] # 直接写这句可以替换上面 if 语句 lx = vary_word_dict.get(lx,lx) words[lx] = words.get(lx, 0) + 1 return words
result = countFile(sys.argv[1], makeVaryWordsDict()) if result != Noneand result != {}: lst = list(result.items()) lst.sort() f = open(sys.argv[2], "w", encoding="gbk") for x in lst: f.write("%s\t%d\n" % (x[0], x[1])) f.close()
defmakeFilterSet(): cet4words = set([]) f = open("cet4words.txt", "r", encoding="gbk") lines = f.readlines() f.close() for line in lines: line = line.strip() if line == "": continue if line[0] == "$": cet4words.add(line[1:]) # 将四级单词加入集合 return cet4words
defmakeSplitStr(txt): splitChars = set([]) # 下面找出所有文件中非字母的字符,作为分隔符 for c in txt: ifnot (c >= 'a'and c <= 'z'or c >= 'A'and c <= 'Z'): splitChars.add(c) splitStr = "" # 生成用于 re.split的分隔符字符串 for c in splitChars: if c in ['.', '?', '!', '"', "'", '(', ')', '|', '*', '$', '\\', '[', ']', '^', '{', '}']: splitStr += "\\" + c + "|" else: splitStr += c + "|" splitStr += " " return splitStr
defcountFile(filename, filterdict): # 词频统计,要去掉在 filterdict集合里的单词 words = {} try: f = open(filename, "r", encoding="gbk") except Exception as e: print(e) return0 txt = f.read() f.close() splitStr = makeSplitStr(txt) lst = re.split(splitStr, txt) for x in lst: lx = x.lower() if lx == ""or lx in filterdict: # 去掉在 filterdict里的单词 continue words[lx] = words.get(lx, 0) + 1 return words
result = countFile(sys.argv[1], makeFilterSet()) if result != {}: lst = list(result.items()) lst.sort() f = open(sys.argv[2], "w", encoding="gbk") for x in lst: f.write("%s\t%d\n" % (x[0], x[1])) f.close()
CREATETABLE if notexists students (id integerprimary key, name text, gpa real, birthday date, age integer, picture blob)
创建了一张名为students的表,有以下字段:
字段名
数据类型
id
integer
primary key表示不可重复
name
text
字符串
gpa
real
小数
birthday
date
日期(本质上就是text)
age
integer
整数
picture
blob
二进制数据(如图片)
INSERT
1
INSERTINTO students VALUES(1000, '张三', 3.81, '2000-09-12', 18, null)
在表students中插入一个记录,该记录暂无照片(nulI)
VALUES(每个字段的值)
创建数据库
创建数据库并写入数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
import sqlite3
db = sqlite3.connect("c: /tmp/test2.db") # 连接数据库, 若不存在则自动创建 # 文件夹c:/tmp 必须事先存在, connect不会创建文件夹 cur = db.cursor() # 获取光标,要操作数据库一般要通过光标进行
sql = '''CREATE TABLE if not exists students (id integer primary key , name text, gpa real, birthday date, age integer, picture blob) '''# 如果表students不存在就创建它 cur.execute(sql) # 执行sQL命令 cur.execute("insert into students values (1600, '张三', 3.81, '2000-09-12', 18, null)") # 插入一个记录
mylist = [(1700, '李四', "3.25", '2001-12-01', 17, None), (1800, '王五', "3.35", '1999-01-01', 19, None)] for s in mylist: # 依次插入mylist中的每个记录 cur.execute('INSERT INTO students VALUES(?,?,?,?,?,?) ', (s[0], s[1], s[2], s[3], s[4], s[5])) # ?对应于后面某项 db.commit() # 真正写入,写入操作都需要这个 cur.close() # 关闭光标 db.close() # 关闭数据库
数据库查询和修改
SELECT
1 2 3 4 5 6 7 8 9 10
SELECT*FROM students #检索students表中全部记录 SELECT*FROM students ORDERBY age #检索students表中全部记录,并按年龄排序 SELECT name, age FROM students #检索students表中全部记录,但每个记录只取name和age字段 SELECT*FROM students WHERE name ='张三' #检索students表中全部name字段为张三的记录,WHERE表示检索条件 SELECT*FROM students WHERE name='张三'AND age >20ORDERBY age DESC #检索students表中全部名为张三且年龄大于20的人,结果按年龄降序排列
检索数据库
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
import sqlite3 db = sqlite3.connect("C:/tmp/test2.db") cur = db.cursor() sql = 'select * from students'#检索全部记录 cur.execute(sql) x = cur.fetchone() #fetchone取满足条件的第一条记录 print(x) #=>(1600, '张三', 3.81, '2000-09-12', 18, None) print(x[1]) #=>张三 for x in cur.fetchall(): #fetchall取得所有满足条件的记录 print(x[:-2]) #age和picture字段不打出 cur.execute("SELECT * FROM students WHERE name= 'Jack'") x = cur.fetchone() if x == None: print("can't find Jack") cur.close() db.close()
检索数据库 import sqlite3 db = sqlite3.connect("c:/tmp/test2.db") cur = db.cursor() sql = 'select name, gpa, age from students where gpa > 3.3 order by age desc'#查找gpa > 3.3的记录,题取其中三个字段,按年龄降序排列
cur.execute(sql) x = cur.fetchall() if x != []: print("total: ", len(x)) #=>2 for r in x: print(r) cur.close() db.close()
1 2 3
total:2 ('王五', 3.35, 19) ('张三', 3.81, 18)
UPDATE
1 2 3 4
UPDATE students SET gpa =3.9 #将所有记录的gpa设置成3.9 UPDATE students SET gpa =3.9, age =18WHERE name ='李四' #修改 李四 的gpa和年龄
1 2 3 4 5 6 7 8 9
import sqlite3 db = sqlite3.connect("c:/tmp/test2.db") cur = db.cursor() sql = 'UPDATE students SET gpa =?, age = ? WHERE name = ?' cur.execute(sql, (4.0, 20, '李四')) #元组三个元素分别对应三个? #修改李四的gpa和年龄。若李四不存在,则无效果 db.commit() #写入操作必须 cur.close() db.close()
DELETE
1 2 3 4 5
DELETEFROM students WHERE age <18 #删除年龄小于18的记录 DELETEFROM students #删除全部记录 #别忘了最后commit
DROP TABLE
1 2
DROPTABLE IF EXISTS students #删除students表
别忘了最后commit
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
import sqlite3 db = sqlite3.connect("c:/tmp/test2.db") cur = db.cursor() cur.execute("DROP TABLE IF EXISTS students") db.commit()
try: cur.execute("select * from students") x = cur.fetchall() for r in x: print(r[:-1]) except: print("no table") #=> no table cur.close() db.close()
列出数据库中所有的表和表的结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
import sqlite3 db = sqlite3.connect("c:/tmp/test3.db") cur = db.cursor() sql = 'CREATE TABLE if not exists table2(id integer, name text)' cur.execute(sql) #执行SQL命令 sq| = 'CREATE TABLE if not exists table1(id integer, schook text)' cur.execute(sql) db.commit()
cur.execute('select name from SQLITE_MASTER where type="table" order by NAME') x = cur.fetchall() if x != []: print (x)
f = open('c:/tmp/tmp.jpg', 'rb') #二进制方式打开图片 img = f.read() f.close()
db = sqlite3.connect("c:/tmp/test2.db") cur = db.cursor() sql = "UPDATE students SET picture=? WHERE name = '李四'" cur.execute(sq|, (img,)) #设置李四的照片。img对应于?
imgUrl = "https://img5.duitang.com/uploads/item/201605/19/20160519224441_VfMWRjpeg"#从网络获取图片 imgStream = requests.get(imgUrl, stream=True) sql = "UPDATE students SET picture=? WHERE name = '张三' " cur.execute(sql, (imgStream.content, )) #设置张三的照片。img对应于? db.commit() cur.close() db.close()
读取blob字段(二进制字段)的值
1 2 3 4 5 6 7 8 9 10 11 12 13 14
import sq|ite3 import requests db = sqlite3.connect("c:/tmp/test2.db") cur = db.cursor() sq| = "select name, picture from students WHERE name = '张三' or name = ' 李四'" cur.execute(sql) x = cur.fetchall()
for r in x: # r[0]是姓名,r[1]是图片文件数据 f = open("c:/tmp/" + r[0] + ".jpg", "wb") #照片写入张三.jpg和李四.jpg f.write(r[1]) f.close() cur.close() db.close()