2019.09.17 16:30:00
- 創建索引/修改配置
//創建索引 public function create_index(){ $params = [ 'index' => 'my_index', 'body' => [ 'settings' => [ 'number_of_shards' => 2, 'number_of_replicas' => 0, ] ] ]; $client = ClientBuilder::create()->build(); $response = $client->indices()->create($params); var_dump($response); } //修改配置 public function put_setting(){ $params = [ 'index' => 'person', 'body' => [ 'settings' => [ 'number_of_replicas' => 10, ] ], ]; $client = ClientBuilder::create()->build(); var_dump($client->indices()->putSettings($params)); }
創建好的索引分片是無法通過put_setting來修改的,這個是一個坑,要求我們在創建索引之處就要好好規劃這個結構及容量,否則之後的擴容過程會比較辛苦
- 修改mapping
//將修改mapping public function put_mapping(){ $mapping = [ 'properties' => [ 'address' => [ 'type' => 'keyword', ], 'email' => [ 'type' => 'keyword', ] ] ]; $params = [ 'index' => 'person', 'type' => 'doc', 'body' => $mapping, ]; $client = ClientBuilder::create()->build(); var_dump($client->indices()->putMapping($params)); }
如果要對已存在的索引進行修改,與創建時有所不同,要指出修改的mapping類型,這裏還要有一個地方要注意,那就是修改的mapping,新增的字段是追加的形式放入es裏的,之前存在的並不會消失。
- bulk批量操作
//批量創建文檔 public function bulk_create_another(){ $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [], ]; for ($i =1; $i<=10;$i++){ $params['body'][] = [ 'create' => [ //index 與 create一致都是創建文檔 '_id' => $i, ] ]; $params['body'][] = [ 'name' => 'PHPerJiang'.$i, 'age' => $i, 'sex' => $i%2, ]; } $client = ClientBuilder::create()->build(); var_dump($client->bulk($params)); } //批量更新 public function bulk_update_another(){ $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [] ]; for($i = 1; $i <= 10; $i++){ $params['body'][] = [ 'update' => [ '_id' => $i ] ]; $params['body'][] = [ 'doc' => [ 'name' => 'PHPerJiang'.$i*2, 'age' => $i*3, 'sex' => $i%2, ] ]; } $client = ClientBuilder::create()->build(); var_dump($client->bulk($params)); } //批量刪除 public function bluk_delete_another(){ $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [], ]; for ($i = 1; $i <= 10; $i++){ $params['body'][] = [ 'delete' => [ '_id' => $i, ] ]; } $client = ClientBuilder::create()->build(); var_dump($client->bulk($params)); }
批量增刪改,要注意批量參數中body的寫法,指出索引、類型、身體,身體中的操作分爲連兩部分,一部分是條件,一部分是數據。另外要注意的就是修改和產出操作,身體的第二部分數據部分要指明索引,否則es會報錯,而新增數據參數中的第二部分不需要志寧索引
-
部分修改文檔
//部分更改doc,若 body 參數中指定一個 doc 參數。這樣 doc 參數內的字段會與現存字段進行合併。 public function update_doc(){ $params = [ 'index' => 'person', 'type' => 'doc', 'id' => 2, 'body' => [ 'doc' => [ 'bbb' => '3' ] ] ]; $client = ClientBuilder::create()->build(); var_dump($client->update($params)); }
body參數中若指出doc參數,則會將es現有的字段與doc中的字段合併,相當於php的array_merge()函數,即es中如果沒有這個字段則會創建。
2019-09-19更新
- 使用腳本script更新doc
$params = [ 'index' => 'my_index', 'type' => 'my_type', 'id' => 'my_id', 'body' => [ 'script' => 'ctx._source.counter += count', 'params' => [ 'count' => 4 ] ] ]; $response = $client->update($params);
PHP-ElasticSearch文檔中是如上寫的,經過我實際應用發現是個坑,按照以上寫法會報錯找不到參數count,正確的寫法應該是如下
//使用腳本更新數據 public function update_doc_by_script(){ $params = [ 'index' => 'person', 'type' => 'doc', 'id' => 2, 'body' => [ 'script' => [ 'lang' => 'painless', 'source' => 'ctx._source.age += params.count', 'params' => ['count' => 1], ] ] ]; $client = ClientBuilder::create()->build(); var_dump($client->update($params)); }
將參數放入script參數內纔可以,表示開始對文檔有深深的懷疑了。
2019-09-20 更新
php-es的官方文檔有很多錯誤,希望大家選擇性使用
- 使用腳本更新數據,若數數據中沒有這個字段則設定默認值。文檔中是這麼用的
$params = [ 'index' => 'my_index', 'type' => 'my_type', 'id' => 'my_id', 'body' => [ 'script' => 'ctx._source.counter += count', 'params' => [ 'count' => 4 ], 'upsert' => [ 'counter' => 1 ] ] ]; $response = $client->update($params);
第一點文檔中的script使用方法不對,首先我們先把script給修正以下,如下代碼,注意下列代碼中的age1字段在es中是沒有的。
$params = [ 'index' => 'person', 'type' => 'doc', 'id' => 8, 'body' => [ 'script' => [ 'lang' => 'painless', 'source' => "ctx._source.age1 += params.count", 'params' => [ 'count' => 5, ], ], 'upsert' => [ 'count' => 1 ] ], ];
當我們執行如上腳本的時候,會報錯找不到這個字段
Message: {"error":{"root_cause":[{"type":"remote_transport_exception","reason":"[first-node][127.0.0.1:9300][indices:data/write/update[s]]"}],"type":"illegal_argument_exception","reason":"failed to execute script","caused_by":{"type":"script_exception","reason":"runtime error","script_stack":["ctx._source.age1 += params.count"," ^---- HERE"],"script":"ctx._source.age1 += params.count","lang":"painless","caused_by":{"type":"null_pointer_exception","reason":null}}},"status":400}
實際上就是這個upsert參數沒有生效,這是文檔裏的第二個錯誤。正確的寫法應該如下
$params = [ 'index' => 'person', 'type' => 'doc', 'id' => 8, 'body' => [ 'script' => [ 'lang' => 'painless', 'source' => "ctx._source.age1 = (ctx._source.age1 ?: 2) + params.count", 'params' => [ 'count' => 5, ], ], ], ];
我們在script腳本中判斷是否存在這個age1字段,如果存在則執行後面的累加,如果不存在則給它一個默認值2,並且此時會在es的索引中會加入此字段。這裏要注意 script中出現的 ?: 是painless中特定的語法,詳情看https://www.elastic.co/guide/en/elasticsearch/reference/5.4/modules-scripting-painless-syntax.html
-
搜索的bool查詢:filter\should\must\must_not
public function search_complex(){ $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [ 'query' => [ 'bool' => [ 'filter' => [ 'term' => ['age1' => 22] ], 'must' => [ ['term' => ['age' =>8]], ['term' => ['sex' =>0]] ], ], ], ], ]; $client = ClientBuilder ::create() -> build(); echo json_encode($client -> search($params)); }
搜索分爲過濾filter 和查詢 must\must_not\should,其中在bool參數下單獨使用filter則不會打分,單獨使用must\must_not\should或與filter與前面三個方式組合查詢會返回參數。如果想使用filter查詢又想獲取相關性的得分,有以下兩種方式可以實現:
//方式一 $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [ 'query' => [ 'bool' => [ 'filter' => [ 'term' => ['age1' => 22] ], 'must' => [ 'match_all' => new stdClass() ] ], ], ], ]; //方式二 $params = [ 'index' => 'person', 'type' => 'doc', 'body' => [ 'query' => [ 'constant_score' => [ 'boost' => 2, 'filter' => [ 'term' => ['sex' => 0] ], ], ], ], ];
方式一是使用的must與filter組合查詢,must中使用match_all匹配全部,相當於過濾filter後文檔的全體。方式二是用的contanst_score,它取代了bool,這樣過濾後的文檔得分會被置爲1,配合boost權重,可以給某一個查詢過濾增加權重來分配不同的得分。