Swift源碼分析----swift-proxy與swift-object(2)

感謝朋友支持本博客，歡迎共同探討交流，由於能力和時間有限，錯誤之處在所難免，歡迎指正！

如果轉載，請保留作者信息。
博客地址：http://blog.csdn.net/gaoxingnengjisuan
郵箱地址：[email protected]

PS：最近沒有登錄博客，很多朋友的留言沒有看見，這裏道歉！還有就是本人較少上QQ，可以郵件交流。

接續上一篇博客：

PUT

/swift/proxy/controllers/obj.py----class ContainerController(Controller)----def PUT

def PUT(self, req):
    """
    處理HTTP協議PUT請求；
    """
    ......

    # 容器全局數據；
    container_info = self.container_info(self.account_name, self.container_name, req)
    container_partition = container_info['partition']
    containers = container_info['nodes']
    req.acl = container_info['write_acl']
    req.environ['swift_sync_key'] = container_info['sync_key']
    object_versions = container_info['versions']

     ......


    # 獲取對象的分區號和所有副本節點；
    partition, nodes = self.app.object_ring.get_nodes(self.account_name, self.container_name, self.object_name)

     ......
        
    pile = GreenPile(len(nodes))

     ......

    outgoing_headers = self._backend_requests(
        req, len(nodes), container_partition, containers,
        delete_at_container, delete_at_part, delete_at_nodes)

    # _connect_put_node：實現了PUT方法到各個節點的連接和推送；
    for nheaders in outgoing_headers:
        # RFC2616:8.2.3 disallows 100-continue without a body
        if (req.content_length > 0) or chunked:
            nheaders['Expect'] = '100-continue'
        pile.spawn(self._connect_put_node, node_iter, partition,
                   req.swift_entity_path, nheaders,
                   self.app.logger.thread_locals)

    # 獲取到各個節點的所有的連接；
    conns = [conn for conn in pile if conn]
     ......
     
    bytes_transferred = 0
    try:
        with ContextPool(len(nodes)) as pool:
            for conn in conns:
                conn.failed = False
                conn.queue = Queue(self.app.put_queue_depth)
                pool.spawn(self._send_file, conn, req.path)
            while True:
                 with ChunkReadTimeout(self.app.client_timeout):
                    try:
                        chunk = next(data_source)
                    except StopIteration:
                        if chunked:
                            for conn in conns:
                                conn.queue.put('0\r\n\r\n')
                        break
                bytes_transferred += len(chunk)
                if bytes_transferred > MAX_FILE_SIZE:
                    return HTTPRequestEntityTooLarge(request=req)
                for conn in list(conns):
                    if not conn.failed:
                        conn.queue.put('%x\r\n%s\r\n' % (len(chunk), chunk) if chunked else chunk)
                    else:
                        conns.remove(conn)
                ......

            for conn in conns:
                if conn.queue.unfinished_tasks:
                    conn.queue.join()

        conns = [conn for conn in conns if not conn.failed]

    ......
        
    # 獲取所有連接的響應信息；
    statuses, reasons, bodies, etags = self._get_put_responses(req, conns, nodes)

    ......

    # 根據投票機制，根據現實所有響應信息，實現返回通過投票機制的響應信息；
    resp = self.best_response(req, statuses, reasons, bodies, _('Object PUT'), etag=etag)

     ......
      
    return resp

注：上述方法爲裁剪後剩餘關鍵部分的代碼，各部分具體實現已經在代碼註釋中標註出來；

來看方法_connect_put_node的實現：

def _connect_put_node(self, nodes, part, path, headers, logger_thread_locals):
    """
    實現PUT方法的推送；
    """
    self.app.logger.thread_locals = logger_thread_locals
    for node in nodes:
        try:
            start_time = time.time()
                
            with ConnectionTimeout(self.app.conn_timeout):
                conn = http_connect(node['ip'], node['port'], node['device'], part, 'PUT', path, headers)
                
            self.app.set_node_timing(node, time.time() - start_time)
            with Timeout(self.app.node_timeout):
                resp = conn.getexpect()
            if resp.status == HTTP_CONTINUE:
                conn.resp = None
                conn.node = node
                return conn
            elif is_success(resp.status):
                conn.resp = resp
                conn.node = node
                return conn
            elif headers['If-None-Match'] is not None and resp.status == HTTP_PRECONDITION_FAILED:
                conn.resp = resp
                conn.node = node
                return conn
            elif resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.app.error_limit(node, _('ERROR Insufficient Storage'))
        except (Exception, Timeout):
            self.app.exception_occurred(node, _('Object'), _('Expect: 100-continue on %s') % path)

/swift/obj/server.py----class ContainerController(object)----def PUT

def PUT(self, request):
    """
    新建（上傳數據）/更新一個object對象;，流程如下：
    1.通過req的頭信息明確content-length長度fsize；
    2.獲取對象文件管理類DiskFile的實例化對象；
    3.獲取指定對象的元數據信息；
    4.爲指定對象文件預分配磁盤空間（大小爲fsize）；
    5.按照network_chunk_size（65536比特）接收來自network的chunk，並且檢查上傳文件的大小；
    6.根據request.heads中的值新建/更新指定對象的metadata；
    7.通過file#put方法把更新後的元數據寫入磁盤(包括用temp文件改名.data文件和寫入metadata)；
    8.通過file#unlinkold方法實現刪除較早版本object文件；
    9.調用container_update通知container更新信息；
    """
    # 根據request.path獲取device、partition、account、container、obj等參數；
    device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)

    if 'x-timestamp' not in request.headers or not check_float(request.headers['x-timestamp']):
         return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain')
        
    # 檢測確認要建立對象的一切都已經準備好；
    error_response = check_object_creation(request, obj)
        
    if error_response:
        return error_response
        
    new_delete_at = int(request.headers.get('X-Delete-At') or 0)
    if new_delete_at and new_delete_at < time.time():
        return HTTPBadRequest(body='X-Delete-At in past', request=request, content_type='text/plain')
        
    # 通過req的頭信息明確content-length長度；
    try:
        fsize = request.message_length()
    except ValueError as e:
        return HTTPBadRequest(body=str(e), request=request, content_type='text/plain')

    # 獲取對象文件管理類DiskFile的實例化對象；
    # 這個類實現了應用POSIX-compliant格式的文件系統管理對象文件；
    try:
        disk_file = self.get_diskfile(device, partition, account, container, obj)
    except DiskFileDeviceUnavailable:
        return HTTPInsufficientStorage(drive=device, request=request)
        
    # 獲取對象的元數據信息；
    try:
        orig_metadata = disk_file.read_metadata()
    except (DiskFileNotExist, DiskFileQuarantined):
        orig_metadata = {}

    # Checks for If-None-Match
    if request.if_none_match is not None and orig_metadata:
        if '*' in request.if_none_match:
            # File exists already so return 412
            return HTTPPreconditionFailed(request=request)
        if orig_metadata.get('ETag') in request.if_none_match:
            # The current ETag matches, so return 412
            return HTTPPreconditionFailed(request=request)

    orig_timestamp = orig_metadata.get('X-Timestamp')
    if orig_timestamp and orig_timestamp >= request.headers['x-timestamp']:
        return HTTPConflict(request=request)
        
    orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
    upload_expiration = time.time() + self.max_upload_time
    etag = md5()
    elapsed_time = 0
        
    try:
        # create：爲文件預分配磁盤空間（大小爲size）；
        # 按照network_chunk_size接收來自network的chunk，並且檢查上傳文件的大小；
        # 根據request.heads中的值新建/更新file.metadata；
        # 通過file#put方法把更新後的元數據寫入磁盤(包括用temp文件改名.data文件和寫入metadata)；
            
        # 爲文件預分配磁盤空間（大小爲size）；
        with disk_file.create(size=fsize) as writer:
            upload_size = 0
            def timeout_reader():
                with ChunkReadTimeout(self.client_timeout):
                    return request.environ['wsgi.input'].read(self.network_chunk_size)

            # 按照network_chunk_size接收來自network的chunk；
            try:
                for chunk in iter(lambda: timeout_reader(), ''):
                    start_time = time.time()
                    if start_time > upload_expiration:
                        self.logger.increment('PUT.timeouts')
                        return HTTPRequestTimeout(request=request)
                    etag.update(chunk)
                    upload_size = writer.write(chunk)
                    elapsed_time += time.time() - start_time
            except ChunkReadTimeout:
                return HTTPRequestTimeout(request=request)
            if upload_size:
                self.logger.transfer_rate('PUT.' + device + '.timing', elapsed_time, upload_size)
                
            # 並且檢查上傳文件的大小；
            # 如果接收到的文件大小和request.head中聲明的一致，並且etag也與heads中的'etag'一致時，說明文件接收成功；
            if fsize is not None and fsize != upload_size:
                return HTTPClientDisconnect(request=request)
            etag = etag.hexdigest()
            if 'etag' in request.headers and request.headers['etag'].lower() != etag:
                return HTTPUnprocessableEntity(request=request)
                
            # 根據request.heads中的值新建/更新file.metadata；
            metadata = {
                    'X-Timestamp': request.headers['x-timestamp'],
                    'Content-Type': request.headers['content-type'],
                    'ETag': etag,
                    'Content-Length': str(upload_size),
                }
            metadata.update(val for val in request.headers.iteritems() if is_user_meta('object', val[0]))
            for header_key in (
                    request.headers.get('X-Backend-Replication-Headers') or
                    self.allowed_headers):
                if header_key in request.headers:
                    header_caps = header_key.title()
                    metadata[header_caps] = request.headers[header_key]
                
            # 通過file#put方法把更新後的元數據寫入磁盤(包括用temp文件改名.data文件和寫入metadata)；
            writer.put(metadata)
    except DiskFileNoSpace:
        return HTTPInsufficientStorage(drive=device, request=request)

    # 通過file#unlinkold刪除較早版本object文件；
    if orig_delete_at != new_delete_at:
        if new_delete_at:
            self.delete_at_update('PUT', new_delete_at, account, container, obj, request, device)
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device)

    # 調用container_update通知container更新信息；
    self.container_update(
            'PUT', account, container, obj, request,
            HeaderKeyDict({
                'x-size': metadata['Content-Length'],
                'x-content-type': metadata['Content-Type'],
                'x-timestamp': metadata['X-Timestamp'],
                'x-etag': metadata['ETag']}),
            device)
        
    return HTTPCreated(request=request, etag=etag)

DELETE

/swift/proxy/controllers/obj.py----class ContainerController(Controller)----def DELETE

def DELETE(self, req):
    """
    處理HTTP協議DELETE請求；
    """       
    container_info = self.container_info(self.account_name, self.container_name, req)

        
    container_partition = container_info['partition']
    containers = container_info['nodes']
    req.acl = container_info['write_acl']
    req.environ['swift_sync_key'] = container_info['sync_key']
    object_versions = container_info['versions']
        
    if object_versions:
        # this is a version manifest and needs to be handled differently
        object_versions = unquote(object_versions)
        lcontainer = object_versions.split('/')[0]
        prefix_len = '%03x' % len(self.object_name)
        lprefix = prefix_len + self.object_name + '/'
        last_item = None
        try:
            for last_item in self._listing_iter(lcontainer, lprefix, req.environ):
                pass
        except ListingIterNotFound:
            # no worries, last_item is None
            pass
        except ListingIterNotAuthorized as err:
            return err.aresp
        except ListingIterError:
            return HTTPServerError(request=req)
            
        if last_item:
            # there are older versions so copy the previous version to the
            # current object and delete the previous version
            orig_container = self.container_name
            orig_obj = self.object_name
            self.container_name = lcontainer
            self.object_name = last_item['name'].encode('utf-8')
            copy_path = '/v1/' + self.account_name + '/' + self.container_name + '/' + self.object_name
                
            # 拷貝的目標文件：/self.container_name/self.object_name
            copy_headers = {'X-Newest': 'True', 'Destination': orig_container + '/' + orig_obj}
            copy_environ = {'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True}
            # 根據給定的參數建立一個新的請求對象；
            creq = Request.blank(copy_path, headers=copy_headers, environ=copy_environ)
            copy_resp = self.COPY(creq)
                
            if is_client_error(copy_resp.status_int):
                # some user error, maybe permissions
                return HTTPPreconditionFailed(request=req)
            elif not is_success(copy_resp.status_int):
                # could not copy the data, bail
                return HTTPServiceUnavailable(request=req)
                
            # reset these because the COPY changed them
            self.container_name = lcontainer
            self.object_name = last_item['name'].encode('utf-8')
            new_del_req = Request.blank(copy_path, environ=req.environ)
            container_info = self.container_info(self.account_name, self.container_name, req)
            container_partition = container_info['partition']
            containers = container_info['nodes']
            new_del_req.acl = container_info['write_acl']
            new_del_req.path_info = copy_path
            req = new_del_req
            # remove 'X-If-Delete-At', since it is not for the older copy
            if 'X-If-Delete-At' in req.headers:
                del req.headers['X-If-Delete-At']
    if 'swift.authorize' in req.environ:
        aresp = req.environ['swift.authorize'](req)
        if aresp:
            return aresp
        
    if not containers:
        return HTTPNotFound(request=req)
        
    # 獲取指定對象的分區號和所有副本節點；
    partition, nodes = self.app.object_ring.get_nodes(self.account_name, self.container_name, self.object_name)
    # Used by container sync feature
    if 'x-timestamp' in req.headers:
        try:
            req.headers['X-Timestamp'] = normalize_timestamp(req.headers['x-timestamp'])
        except ValueError:
            return HTTPBadRequest(request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                    'was %r' % req.headers['x-timestamp'])
    else:
        req.headers['X-Timestamp'] = normalize_timestamp(time.time())

    headers = self._backend_requests(req, len(nodes), container_partition, containers)
        
    # 發送一個HTTP請求到多個節點，並匯聚所有返回的響應信息；
    # 根據投票機制，根據現實所有響應信息，返回通過投票機制的響應信息（因爲是獲取多個節點的響應信息）；
    resp = self.make_requests(req, self.app.object_ring,
                              partition, 'DELETE', req.swift_entity_path,
                              headers)
        
    return resp

/swift/obj/server.py----class ContainerController(object)----def DELETE

def DELETE(self, request):
    """
    實現刪除指定對象文件，並刪除較早版本的object文件；
    當object更新後，也要更新container，當object刪除後進行更新所屬container；
    """
    device, partition, account, container, obj = split_and_validate_path(request, 5, 5, True)
        
    if 'x-timestamp' not in request.headers or not check_float(request.headers['x-timestamp']):
        return HTTPBadRequest(body='Missing timestamp', request=request, content_type='text/plain')
        
    try:
        disk_file = self.get_diskfile(device, partition, account, container, obj)
    except DiskFileDeviceUnavailable:
        return HTTPInsufficientStorage(drive=device, request=request)
        
    # 獲取要刪除對象的元數據；
    try:
        orig_metadata = disk_file.read_metadata()

    except DiskFileExpired as e:
        orig_timestamp = e.timestamp
        orig_metadata = e.metadata
        response_class = HTTPNotFound
    except DiskFileDeleted as e:
        orig_timestamp = e.timestamp
        orig_metadata = {}
        response_class = HTTPNotFound
    except (DiskFileNotExist, DiskFileQuarantined):
        orig_timestamp = 0
        orig_metadata = {}
        response_class = HTTPNotFound

        
    else:
        orig_timestamp = orig_metadata.get('X-Timestamp', 0)
        if orig_timestamp < request.headers['x-timestamp']:
            response_class = HTTPNoContent
        else:
            response_class = HTTPConflict
        
    orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
    try:
        req_if_delete_at_val = request.headers['x-if-delete-at']
        req_if_delete_at = int(req_if_delete_at_val)
    except KeyError:
        pass
    except ValueError:
        return HTTPBadRequest(request=request, body='Bad X-If-Delete-At header value')
    else:
        if orig_delete_at != req_if_delete_at:
            return HTTPPreconditionFailed(request=request, body='X-If-Delete-At and X-Delete-At do not match')
        
    # 當更新object的時候，更新到期的對象所屬container；
    # 經過分析代碼，在方法delete_at_update中沒有執行任何操作；
    if orig_delete_at:
        self.delete_at_update('DELETE', orig_delete_at, account, container, obj, request, device)
    req_timestamp = request.headers['X-Timestamp']
        
    if orig_timestamp < req_timestamp:
        # 實現刪除比給定時間戳舊的任何對象文件；
        disk_file.delete(req_timestamp)
        # 當object更新後，也要更新container，用於當object更新時更新所屬container；
        self.container_update(
            'DELETE', account, container, obj, request,
            HeaderKeyDict({'x-timestamp': req_timestamp}),
            device)
    return response_class(request=request)

語句disk_file.delete(req_timestamp)實現了刪除指定對象，並實現了刪除比給定時間戳舊的任何對象文件；

來看方法disk_file.delete（）的實現：

class DiskFile(object)----def delete
def delete(self, timestamp):
    """
    實現刪除比給定時間戳舊的任何對象文件，並實現更新時間戳；
    """
    timestamp = normalize_timestamp(timestamp)

    with self.create() as deleter:
        deleter._extension = '.ts'
        deleter.put({'X-Timestamp': timestamp})

注：具體實現通過分析方法create()和方法put()就可得知和理解，這裏不多說了。

再來看方法container_update的實現：

def container_update(self, op, account, container, obj, request, headers_out, objdevice):
    """
    當object更新後，也要更新container；       
    通過頭文件獲取所有要實現更新container所屬的device和host；
    通過循環遍歷實現發送HTTP請求至所屬container，更新container的數據；
    """
    headers_in = request.headers
    # 從原始請求的頭部信息中獲取container的相應的host信息；
    conthosts = [h.strip() for h in
                 headers_in.get('X-Container-Host', '').split(',')]
    # 從原始請求的頭部信息中獲取container的相應的device信息；
    contdevices = [d.strip() for d in
                   headers_in.get('X-Container-Device', '').split(',')]
    # 從原始請求的頭部信息中獲取container的相應的partition信息；
    contpartition = headers_in.get('X-Container-Partition', '')

    # 如果要改變信息的container所對應的host數目和device數目不同，引發錯誤聲明並返回；
    if len(conthosts) != len(contdevices):
        # This shouldn't happen unless there's a bug in the proxy,
        # but if there is, we want to know about it.
        self.logger.error(_('ERROR Container update failed: different  '
                            'numbers of hosts and devices in request: '
                            '"%s" vs "%s"') %
                           (headers_in.get('X-Container-Host', ''),
                            headers_in.get('X-Container-Device', '')))
        return

    if contpartition:
        updates = zip(conthosts, contdevices)
    else:
        updates = []

    headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
    headers_out['referer'] = request.as_referer()
        
    # 遍歷所有要改變信息container中相對應的host和device，發送更新container信息的請求要求到相應的目標之上；
    # 調用方法async_update來具體實現發送HTTP請求至所屬container，更新container的數據；
    for conthost, contdevice in updates:
        self.async_update(op, account, container, obj, conthost,
                          contpartition, contdevice, headers_out,
                          objdevice)

def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice):
    """
    發送或者保存一個異步更新；
    用於當object發生變化時，發送HTTP請求至所屬container，更新container的數據；
    如果請求失敗，則將更新序列化寫入async_dir的dest文件中,具體路徑如下：
    ASYNCDIR='async_pending'
    async_dir=self.devices/objdevice/<ASYNCDIR>
    hash_path=hash(account,container, obj)
    dest=<async_dir>/<hash_path>[-3:]/<hash_path>-<timestamp>
    """
    headers_out['user-agent'] = 'obj-server %s' % os.getpid()
    # 完整路徑；
    full_path = '/%s/%s/%s' % (account, container, obj)
    if all([host, partition, contdevice]):
        try:
            # 在conn_timeout時間內執行with之下的連接操作，否則引發異常；
            with ConnectionTimeout(self.conn_timeout):
                # 從host中解析出來ip和port的值；
                ip, port = host.rsplit(':', 1)
                # 建立一個HTTPConnection類的對象；
                # 返回HTTPConnection連接對象；
                conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out)
            # 在node_timeout時間內執行with之下的操作，否則引發異常；
            with Timeout(self.node_timeout):
                # 獲取來自所屬container服務器的響應；
                response = conn.getresponse()
                response.read()
                # 根據連接狀態判斷連接是否成功，成功則直接返回；
                if is_success(response.status):
                    return
                else:
                    self.logger.error(_(
                        'ERROR Container update failed '
                        '(saving for async update later): %(status)d '
                        'response from %(ip)s:%(port)s/%(dev)s'),
                        {'status': response.status, 'ip': ip, 'port': port,
                         'dev': contdevice})
        except (Exception, Timeout):
            self.logger.exception(_(
                'ERROR container update failed with '
                '%(ip)s:%(port)s/%(dev)s (saving for async update later)'),
                {'ip': ip, 'port': port, 'dev': contdevice})
    data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}
    timestamp = headers_out['x-timestamp']
    self._diskfile_mgr.pickle_async_update(objdevice, account, container, obj, data, timestamp)

def pickle_async_update(self, device, account, container, obj, data, timestamp):
    device_path = self.construct_dev_path(device)
    # 如果請求失敗，則將更新序列化寫入async_dir的dest文件中,具體路徑如下：
    # ASYNCDIR='async_pending'
    # async_dir=self.devices/objdevice/<ASYNCDIR>
    # hash_path=hash(account,container, obj)
    # dest=<async_dir>/<hash_path>[-3:]/<hash_path>-<timestamp>
    async_dir = os.path.join(device_path, ASYNCDIR)
    # hash_path：根據情況獲取account/container/object的哈希值，這裏當然是獲取object的哈希值啦；
    ohash = hash_path(account, container, obj)
    # 確保pickle文件寫入到磁盤；
    # 先寫道臨時位置，確保它同步到磁盤，然後移動到磁盤上最終的位置；
    self.threadpools[device].run_in_thread(
        write_pickle,
        data,
        os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(timestamp)),
        os.path.join(device_path, 'tmp'))
    self.logger.increment('async_pendings')

def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
    """
    確保pickle文件寫入到磁盤；
    先寫道臨時位置，確保它同步到磁盤，然後移動到磁盤上最終的位置；
    """
    if tmp is None:
        tmp = os.path.dirname(dest)
    fd, tmppath = mkstemp(dir=tmp, suffix='.tmp')
    with os.fdopen(fd, 'wb') as fo:
        pickle.dump(obj, fo, pickle_protocol)
        fo.flush()
        os.fsync(fd)
        renamer(tmppath, dest)

Swift源碼分析----swift-proxy與swift-object(2)

Ceilometer項目源碼分析----ceilometer-agent-notification服務的初始化和啓動

Ceilometer項目源碼分析----ceilometer-alarm-evaluator服務的初始化和啓動

Swift源碼分析----swift-container-auditor

Swift源碼分析----swift-container-replicator

Swift源碼分析----swift-proxy實現請求req的轉發

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結