Lesson-02
OOV ?
Out of Vocabulary
Lambda
import random
"""adj* => adj* adj | adj null"""
def adj ( ) : return random. choice( '藍色的 | 好看的 | 小小的' . split( '|' ) ) . split( ) [ 0 ]
give_adjs = lambda : adj( ) + adj_star_2( )
give_null = lambda : ''
def adj_star_2 ( ) :
return random. choice( [ give_null, give_adjs] ) ( )
def adj_star ( ) :
return random. choice( [ '' , adj( ) + adj_star( ) ] )
adj_star_2( )
def simple_func ( x, y) :
return x + y
results = [ ]
for e in [ 1 , 3 , 5 , 7 , 9 , 10 ] :
results. append( operate( e) )
results
for i in map ( lambda x: x * 2 + 2 - 9 , [ 1 , 3 , 5 , 7 , 9 , 10 ] ) :
print ( i)
operator_2 = lambda x: x * 2 + 2 - 9
def absolutly ( x) :
if x < 0 : return - 1 * x
else :
return x
def mod_3 ( x) : return x % 3
mod_3( 13 )
sorted ( [ 1 , 3 , 4 , 1 , 5 , 1 , 1 , 4 , 9 , - 1 , 9 , - 19 , 1 ] , key= mod_3)
sorted ( [ 1 , 3 , 4 , 1 , 5 , 1 , 1 , 4 , 9 , - 1 , 9 , - 19 , 1 ] , key= lambda x: x % 3 )
Search Policy
coordination_source = """
{name:'Lanzhou', geoCoord:[103.73, 36.03]},
{name:'Jiayuguan', geoCoord:[98.17, 39.47]},
{name:'Xining', geoCoord:[101.74, 36.56]},
{name:'Chengdu', geoCoord:[104.06, 30.67]},
{name:'Shijiazhuang', geoCoord:[114.48, 38.03]},
{name:'Lasa', geoCoord:[102.73, 25.04]},
{name:'Guiyang', geoCoord:[106.71, 26.57]},
{name:'Wuhan', geoCoord:[114.31, 30.52]},
{name:'Zhengzhou', geoCoord:[113.65, 34.76]},
{name:'Jinan', geoCoord:[117, 36.65]},
{name:'Nanjing', geoCoord:[118.78, 32.04]},
{name:'Hefei', geoCoord:[117.27, 31.86]},
{name:'Hangzhou', geoCoord:[120.19, 30.26]},
{name:'Nanchang', geoCoord:[115.89, 28.68]},
{name:'Fuzhou', geoCoord:[119.3, 26.08]},
{name:'Guangzhou', geoCoord:[113.23, 23.16]},
{name:'Changsha', geoCoord:[113, 28.21]},
//{name:'海口', geoCoord:[110.35, 20.02]},
{name:'Shengyang', geoCoord:[123.38, 41.8]},
{name:'Changchun', geoCoord:[125.35, 43.88]},
{name:'Haerbing', geoCoord:[126.63, 45.75]},
{name:'Taiyuan', geoCoord:[112.53, 37.87]},
{name:'Xian', geoCoord:[108.95, 34.27]},
//{name:'Taiwan', geoCoord:[121.30, 25.03]},
{name:'Beijing', geoCoord:[116.46, 39.92]},
{name:'Shanghai', geoCoord:[121.48, 31.22]},
{name:'Chongqing', geoCoord:[106.54, 29.59]},
{name:'Tianjing', geoCoord:[117.2, 39.13]},
{name:'Huhehaote', geoCoord:[111.65, 40.82]},
{name:'Nanning', geoCoord:[108.33, 22.84]},
//{name:'西藏', geoCoord:[91.11, 29.97]},
{name:'Yingchuan', geoCoord:[106.27, 38.47]},
{name:'Wulumuqi', geoCoord:[87.68, 43.77]},
{name:'Xianggang', geoCoord:[114.17, 22.28]},
{name:'Aomen', geoCoord:[113.54, 22.19]}
"""
geo_pattern = "{name:'(\w+)',\s+geoCoord:\[(\d+\.\d+),\s+(\d+\.\d+)\]}"
city_location = { }
for city, x, y in re. findall( geo_pattern, coordination_source) :
city_location[ city] = ( float ( x) , float ( y) )
city_location
cities = { c : [ ] for c in city_location}
nx. draw( nx. Graph( cities) , city_location, with_labels= True , font_size= 10 , node_size= 10 )
import math
def geo_distance ( origin, destination) :
"""
Calculate the Haversine distance.
Parameters
----------
origin : tuple of float
(lat#緯度-北緯南緯-y, long#經度-東經西經-x)
destination : tuple of float
(lat, long)
Returns
-------
distance_in_km : float
Examples
--------
>>> origin = (48.1372, 11.5756) # Munich
>>> destination = (52.5186, 13.4083) # Berlin
>>> round(distance(origin, destination), 1)
504.2
"""
lon1, lat1 = origin
lon2, lat2 = destination
radius = 6371
dlat = math. radians( lat2 - lat1)
dlon = math. radians( lon2 - lon1)
a = ( math. sin( dlat / 2 ) * math. sin( dlat / 2 ) +
math. cos( math. radians( lat1) ) * math. cos( math. radians( lat2) ) *
math. sin( dlon / 2 ) * math. sin( dlon / 2 ) )
c = 2 * math. atan2( math. sqrt( a) , math. sqrt( 1 - a) )
d = radius * c
return d
city_location[ 'Beijing' ]
city_location[ 'Tianjing' ]
city_location[ 'Shengyang' ]
city_location[ 'Shanghai' ]
geo_distance( ( 116.46 , 39.92 ) , ( 117.2 , 39.13 ) )
平面上: P1: [123.38, 41.8] P2: [121.48, 31.22]
P1: [41.8, 123.38] P2: [31.22, 121.48]
geo_distance( ( 123.38 , 41.8 ) , ( 121.48 , 31.22 ) )
geo_distance( ( 41.8 , 123.38 ) , ( 31.22 , 121.48 ) )
def get_city_distance ( city1, city2) :
return geo_distance( city_location[ city1] , city_location[ city2] )
get_city_distance( 'Beijing' , 'Shanghai' )
get_city_distance( 'Hangzhou' , 'Shanghai' )
from collections import defaultdict
threshold = 700
city_connection = defaultdict( list )
for city1 in city_location:
for city2 in city_location:
if city1 is city2: continue
distance = get_city_distance( city1, city2)
if distance < threshold:
city_connection[ city1] . append( city2)
city_connection
import re
some_text = 'this is a test text, I want some more tests; And I have some tested cases'
'test' , 'tests' , 'testedededed'
+ 一個或者多個字符
* 指的是0個或者多個字符
re == > regular expresson
pattern = 'test\w*'
pattern_2 = 'test\w+'
re. findall( pattern_2, some_text)
with_numbers_text = """
電話:18910451234 年齡:32 性別:男 ID:90
電話:18910451234 年齡:32 性別:男
電話:18910451234 年齡:32 性別:男
電話:18910451234 年齡:32 性別:男
電話:189104513123234 年齡:32 性別:男 ID: 312
電話:18910451234 年齡:32 性別:男
電話:18989810451234 年齡:32 性別:男 STU_OD: 312
電話:01018910451234 年齡:3 性別:男
電話:18910451234 年齡:32 性別:男
電話:18910451234 年齡:32 性別:男
電話:18910451234 年齡:32 性別:男
"""
with_constraint_num_pattern = '電話:(\d+) 年齡:(\d+)'
re. findall( with_constraint_num_pattern, with_numbers_text)
import networkx as nx
relatinship = {
'Zhangsan' : [ ] ,
'Li' : [ ] ,
'Huang' : [ ]
}
simple_graph = nx. Graph( relatinship)
% matplotlib inline
locations = {
'Zhangsan' : ( 0 , 0 ) ,
'Li' : ( 1 , 4 ) ,
'Wang' : ( 9 , 0 ) ,
'Liu' : ( 19 , 1 ) ,
'Gao' : ( 10 , 10 ) ,
'Huang' : ( 11 , 19 ) ,
'Hu' : ( 18 , - 10 )
}
nx. draw( simple_graph, locations, with_labels= True )
city_graph = nx. Graph( city_connection)
nx. draw( city_graph, city_location, with_labels= True )
五子棋 -->
象棋
從鄭州到嘉峪關爲例
廣度優先搜索
def bfs ( graph, start, destination) :
pathes = [ [ start] ]
seen = set ( )
visited_cities = [ ]
while pathes:
path = pathes. pop( 0 )
froniter = path[ - 1 ]
if froniter in seen: continue
for successor in graph[ froniter] :
if successor in path: continue
new_path = path + [ successor]
if successor == destination:
visited_cities. append( froniter)
visited_cities. append( successor)
return new_path, visited_cities, pathes
pathes. append( new_path)
seen. add( froniter)
visited_cities. append( froniter)
return seen, visited_cities, pathes
def best_fs ( graph, start, destination, strategy) :
pathes = [ [ start] ]
seen = set ( )
visited_cities = [ ]
while pathes:
path = pathes. pop( 0 )
froniter = path[ - 1 ]
if froniter in seen: continue
for successor in graph[ froniter] :
if successor in path: continue
new_path = path + [ successor]
if successor == destination:
visited_cities. append( froniter)
visited_cities. append( successor)
return new_path, visited_cities, pathes
pathes. append( new_path)
seen. add( froniter)
visited_cities. append( froniter)
pathes = sorted ( pathes, key= strategy)
return seen, visited_cities, pathes
p, v, ps = best_fs( city_graph, 'Haerbing' , 'Hefei' , get_total_length)
pretty_print( p)
p, v, ps = best_fs( city_graph, 'Haerbing' , 'Hefei' , lambda x: - len ( x) )
pretty_print( p)
get_total_length( p)
p, v, ps = bfs( city_graph, 'Haerbing' , 'Hefei' )
pretty_print( p)
get_total_length( p)
def pretty_print ( cities) :
print ( '🚘->' . join( cities) )
p, v, ps = bfs( city_graph, 'Haerbing' , 'Hefei' )
pretty_print( p)
def get_total_length ( path) :
distance = 0
for i, city in enumerate ( path[ : - 1 ] ) :
current, next_c = city, path[ i+ 1 ]
distance += get_city_distance( current, next_c)
return distance
depth_first_search( city_graph, 'Beijing' , 'Hefei' )
深度優先算法
def depth_first_search ( graph, start, destination) :
pathes = [ [ start] ]
seen = set ( )
visited_cities = [ ]
while pathes:
path = pathes. pop( - 1 )
froniter = path[ - 1 ]
if froniter in seen: continue
for successor in graph[ froniter] :
if successor in path: continue
new_path = path + [ successor]
if successor == destination:
visited_cities. append( froniter)
visited_cities. append( successor)
return new_path, visited_cities, pathes
pathes. append( new_path)
seen. add( froniter)
visited_cities. append( froniter)
return seen, visited_cities, pathes
path, visited_cities, pathes = search( city_connection, 'Zhengzhou' , 'Jiayuguan' )
path
pathes
color_map = [ 'green' for _ in city_graph]
def visited_procedure ( graph, postion, visited_order, step) :
changed = visited_order[ : step] if step else visited_order
color_map = [ 'green' if c in changed else 'red' for c in graph]
nx. draw( graph, postion, node_color= color_map, with_labels= True )
path, visited_cities_bfs, pathes = search( city_connection, 'Zhengzhou' , 'Jiayuguan' )
visited_procedure( city_graph, city_location, visited_cities_bfs, step= 20 )
path, visited_cities_dfs, pathes = depth_first_search( city_connection, 'Zhengzhou' , 'Jiayuguan' )
visited_procedure( city_graph, city_location, visited_cities_dfs, step= 18 )
path
找到了從鄭州到嘉峪關的路
path, visited_cities, pathes = search( city_connection, 'Zhengzhou' , 'Jiayuguan' )
visited_procedure( city_graph, city_location, visited_cities, step= None )
爬蟲、搜索引擎與BFS,DFS的關係
如果你要做一個爬蟲程序,你應該用BFS還是DFS?
import requests
https: // github. com/ Autobicycle- Never- Blocking/ Beloved- Little- Moto/ blob/ master/ crawl. py
作業:從https://ditie.mapbar.com/beijing/開始
爬蟲
正則表達式
數據預處理
BFS
Best-First-Search
-> 實現北京地鐵的換乘方案
url = 'https://ditie.mapbar.com/beijing/'
response = requests. get( url)
pattern = """<a href="(http://ditie\.mapbar\.com/beijing/\w+/)" style="background-color:#\w+" target="_blank">(\w+)</a>"""
re. findall( pattern, response. text)
a_station = 'https://ditie.mapbar.com/beijing/line_ditiebatongxian/'
response = requests. get( a_station)
pattern_s = '<li\s*.*><a class="cl\-station" href="http://ditie\.mapbar\.com/beijing/station_\w+/" target="_blank" title="\w+">(\w+)</a></li>'
re. findall( pattern_s, response. text)
{
'八通線' :[ ] ,
'一號線' :[ ] ,
}