QQwry是國內很流行的一箇中文IP數據庫,它使用特別的格式存儲IP記錄壓縮率很高,正因如此,每次升級數據庫時得重新下載整庫,我希望讓它更簡單一點,升級只需要下載批量更新數據,我試着把它轉換成sqlite3,
先貼我的代碼
Python代碼:
'''
Created on 2010-4-17
@author: Ben
@mail: [email protected]
'''
import
os
import
sys
import
sqlite3
def binToInt(data):
"""將傳入的bytes轉換爲integer
傳入的bytes序列將按編號從低位到高位的排序組轉換爲integer,如:
data[0]~data[3]分別是:0x00 0x01 0x02 0x03
那麼它對應的數值的binary是
00000011 00000010 00000001 00000000
等於十進制數值50462976
"""
x = 0
for
i,d in
enumerate(data):
x |= d << ( i * 8 )
return
x
def readRecord(index):
"""根據傳入的索引地址讀取該記錄的全部信息
根據傳入的索引地址讀取該IP記錄,如其實IP,結束IP,country字段,area字段
"""
def readStr():
"""從指定索引讀取出文本內容
從指定索引開始向後檢索0,提取出字節並轉換爲字符信息;索引index引用readRecord的變量index,並且在讀取字符後將index指向0
"""
nonlocal
index
old,index = index, data.find( b'x00'
, index ) # 從index位置開始查找0所在位置
return
data[ old : index ].decode('gbk'
)
start_ip = '{}.{}.{}.{}'
.format( *reversed(data[ index : index + 4 ]) ) # 將IP字節轉換爲人可讀的IP地址,如: 192.168.1.1
index = binToInt(data[ index + 4 : index + 7 ]) # 獲取結束IP的索引地址
end_ip = '{}.{}.{}.{}'
.format( *reversed(data[ index : index + 4 ]) ) # 結束IP地址
# 設置索引變量指向指向end ip後的索引處
index += 4
# 判斷IP信息的索引模式
if
data[ index ] == 0x01:
index = binToInt(data[ index + 1 : index + 4 ]) # 重設索引到新位置
if
data[ index ] == 0x02: # 判斷country信息是指針還是字符串
old, index = index, binToInt( data[ index + 1 : index + 4 ] ) # 緩存當前索引地址,並將索引變量指向country字符串所在地址
country = readStr() # 讀取country信息
index = old + 4 # 將索引指area地址
else
:
country = readStr()
index += 1
if
data[ index ] == 0x01 or
data[ index ] == 0x02:
index = binToInt( data[ index + 1 : index + 4 ] )
area = readStr() # 獲取area信息
return
[ start_ip, end_ip, country, area ]
# 全局變量,用於存儲一些數據庫的常量
data = bytes() # 用於緩存數據庫內容
cur = None
c_record = {}
a_record = { "" : None
}
def convert(db):
"""將IP記錄從QQway中檢索出來並存入sqlite3數據庫中"""
if not
os.path.exists(db):
print("Not found db:"
,db)
return
global
data, firstIndex, lastIndex, cur
with
open(db,'rb'
) as
f:
data = f.read() # 從數據庫中讀取字節並進行緩存
firstIndex = binToInt(data[:4])
lastIndex = binToInt(data[4:8])
conn = sqlite3.connect( "ipdb.sdb3"
)
cur = conn.cursor()
for
i in
range(firstIndex, lastIndex, 7):
saveIpRecord( readRecord(i) )
else
:
conn.commit()
conn.close()
print("converted and saving to ipdb.sql3"
)
def saveIpRecord(record):
"""保存IP記錄到數據庫"""
global
cur, c_record, a_record
start_ip, end_ip, country, area = record
if
country not in
c_record:
cur.execute("INSERT INTO country VALUES(null, ?)"
, (country, ))
c_record[ country ] = cur.execute("SELECT max(id) FROM country"
).fetchone()[0]
if
area not in
a_record:
cur.execute("INSERT INTO area VALUES(null, ?)"
, (area, ))
a_record[ area ] = cur.execute("SELECT max(id) FROM area"
).fetchone()[0]
cur.execute("INSERT INTO ipaddr VALUES(null, ?, ?, ?, ?)"
, ( start_ip, end_ip, c_record[ country ], a_record[ area ] ))
if
__name__ == '__main__'
:
if
len(sys.argv) == 2:
convert(sys.argv[1])
else
:
print("usage: converter.py database"
)
你需要先了解QQwry.dat的格式,那麼應該看的懂我們代碼.生成的sqlite3很大是原來的6倍.
不過這只是測試代碼,我還能改進優化存儲,應該能至少壓縮一半,並加快檢索速度.